Buckets:
| { | |
| "next_trial_id": 11, | |
| "trials": { | |
| "0": { | |
| "trial_id": 0, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 4000, | |
| "val_games": 500, | |
| "min_ply": 10, | |
| "total_steps": 100, | |
| "batch_size": 64, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.05, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": null, | |
| "log_interval": 20, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "float16", | |
| "no_compile": false, | |
| "sdpa_math": true, | |
| "num_workers": 0, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs/trial_0000", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": null, | |
| "bottleneck_dim": 16, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0000/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 4000, | |
| "val_games": 500, | |
| "min_ply": 10, | |
| "total_steps": 100, | |
| "batch_size": 64, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.05, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": null, | |
| "log_interval": 20, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "float16", | |
| "no_compile": false, | |
| "sdpa_math": true, | |
| "num_workers": 0, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs/trial_0000", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": null, | |
| "bottleneck_dim": 16, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "completed", | |
| "pid": 60913, | |
| "gpu_id": 0, | |
| "start_time": 1775346146.9661582, | |
| "end_time": 1775346192.2253375, | |
| "current_step": 100, | |
| "total_steps": 100, | |
| "steps_per_sec": 2.499625056241564, | |
| "last_train_loss": 2.6580834102494304, | |
| "last_train_acc": 0.2578537154243756, | |
| "best_val_loss": 2.6815053782271594, | |
| "best_accuracy": 0.25373134694665184, | |
| "actual_param_count": 262144, | |
| "log_path": "/workspace/sweep_results/trial_0000.log", | |
| "run_dir": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison", | |
| "optuna_number": null, | |
| "notes": "MCP smoke test: 256k bottleneck, 100 steps, MATH+compile+fp16", | |
| "tags": [ | |
| "mcp-test", | |
| "bottleneck-small" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "17h18m" | |
| }, | |
| "1": { | |
| "trial_id": 1, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 4000, | |
| "val_games": 500, | |
| "min_ply": 10, | |
| "total_steps": 30, | |
| "batch_size": 64, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.05, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": null, | |
| "log_interval": 20, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "float16", | |
| "no_compile": false, | |
| "sdpa_math": true, | |
| "num_workers": 0, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs/trial_0000", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison/checkpoints/best", | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": null, | |
| "bottleneck_dim": 16, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0001/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 4000, | |
| "val_games": 500, | |
| "min_ply": 10, | |
| "total_steps": 30, | |
| "batch_size": 64, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.05, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": null, | |
| "log_interval": 20, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "float16", | |
| "no_compile": false, | |
| "sdpa_math": true, | |
| "num_workers": 0, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs/trial_0000", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": "/workspace/logs/trial_0000/bottleneck_20260404_234228_keen-bison/checkpoints/best", | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": null, | |
| "bottleneck_dim": 16, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "killed", | |
| "pid": 61357, | |
| "gpu_id": 0, | |
| "start_time": 1775346206.78868, | |
| "end_time": 1775346209.3660913, | |
| "current_step": 0, | |
| "total_steps": 30, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0001.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "mcp-test", | |
| "bottleneck-small" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "17h17m" | |
| }, | |
| "2": { | |
| "trial_id": 2, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 3500000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 50, | |
| "pause_after_steps": 20000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": false, | |
| "sdpa_math": true, | |
| "num_workers": 4, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0002/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 3500000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 50, | |
| "pause_after_steps": 20000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": false, | |
| "sdpa_math": true, | |
| "num_workers": 4, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "killed", | |
| "pid": 71057, | |
| "gpu_id": 0, | |
| "start_time": 1775351012.9564307, | |
| "end_time": 1775351080.0435607, | |
| "current_step": 0, | |
| "total_steps": 200000, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0002.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "18h-push", | |
| "scaling", | |
| "20M" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h57m" | |
| }, | |
| "3": { | |
| "trial_id": 3, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 50, | |
| "pause_after_steps": 20000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": false, | |
| "sdpa_math": true, | |
| "num_workers": 4, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0003/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 50, | |
| "pause_after_steps": 20000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": false, | |
| "sdpa_math": true, | |
| "num_workers": 4, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "killed", | |
| "pid": 72205, | |
| "gpu_id": 0, | |
| "start_time": 1775351111.2472177, | |
| "end_time": 1775351835.388752, | |
| "current_step": 0, | |
| "total_steps": 200000, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0003.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "18h-push", | |
| "scaling", | |
| "20M", | |
| "full-data" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h55m" | |
| }, | |
| "4": { | |
| "trial_id": 4, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 50, | |
| "pause_after_steps": 15000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 4, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0004/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 50, | |
| "pause_after_steps": 15000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 4, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "killed", | |
| "pid": 73543, | |
| "gpu_id": 0, | |
| "start_time": 1775351848.6815178, | |
| "end_time": 1775352133.8517451, | |
| "current_step": 0, | |
| "total_steps": 200000, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0004.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "18h-push", | |
| "scaling", | |
| "20M", | |
| "eager" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h43m" | |
| }, | |
| "5": { | |
| "trial_id": 5, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 50, | |
| "pause_after_steps": 15000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0005/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 50, | |
| "pause_after_steps": 15000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "killed", | |
| "pid": 74605, | |
| "gpu_id": 0, | |
| "start_time": 1775352154.8011737, | |
| "end_time": 1775352288.1328025, | |
| "current_step": 0, | |
| "total_steps": 200000, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0005.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "18h-push", | |
| "scaling", | |
| "20M", | |
| "workers2" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h38m" | |
| }, | |
| "6": { | |
| "trial_id": 6, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 5000, | |
| "min_ply": 10, | |
| "total_steps": 100, | |
| "batch_size": 64, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 20, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0006/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 5000, | |
| "min_ply": 10, | |
| "total_steps": 100, | |
| "batch_size": 64, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 20, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "completed", | |
| "pid": 75207, | |
| "gpu_id": 0, | |
| "start_time": 1775352312.4274437, | |
| "end_time": 1775352342.5313256, | |
| "current_step": 0, | |
| "total_steps": 100, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0006.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "diagnostic", | |
| "small-bs" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h35m" | |
| }, | |
| "7": { | |
| "trial_id": 7, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 5000, | |
| "min_ply": 10, | |
| "total_steps": 100, | |
| "batch_size": 128, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 20, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0007/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 5000, | |
| "min_ply": 10, | |
| "total_steps": 100, | |
| "batch_size": 128, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 20, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "completed", | |
| "pid": 75775, | |
| "gpu_id": 0, | |
| "start_time": 1775352384.985535, | |
| "end_time": 1775352420.1244109, | |
| "current_step": 0, | |
| "total_steps": 100, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0007.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "diagnostic", | |
| "bs128" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h34m" | |
| }, | |
| "8": { | |
| "trial_id": 8, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 5000, | |
| "min_ply": 10, | |
| "total_steps": 50, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 10, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0008/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 5000, | |
| "min_ply": 10, | |
| "total_steps": 50, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 10, | |
| "pause_after_steps": null, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "completed", | |
| "pid": 76342, | |
| "gpu_id": 0, | |
| "start_time": 1775352440.4786208, | |
| "end_time": 1775352475.6319711, | |
| "current_step": 0, | |
| "total_steps": 50, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0008.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "diagnostic", | |
| "bs256" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h33m" | |
| }, | |
| "9": { | |
| "trial_id": 9, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 10, | |
| "pause_after_steps": 15000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0009/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 10, | |
| "pause_after_steps": 15000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "killed", | |
| "pid": 77221, | |
| "gpu_id": 0, | |
| "start_time": 1775352573.273321, | |
| "end_time": 1775352817.925155, | |
| "current_step": 0, | |
| "total_steps": 200000, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0009.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "18h-push", | |
| "scaling", | |
| "20M", | |
| "bs256" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h31m" | |
| }, | |
| "10": { | |
| "trial_id": 10, | |
| "strategy": "bottleneck", | |
| "params": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 100, | |
| "pause_after_steps": 15000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "cli_command": [ | |
| "python3", | |
| "/opt/pawn/scripts/train.py", | |
| "--config", | |
| "/workspace/logs/trial_0010/run_config.json" | |
| ], | |
| "config": { | |
| "elo_min": 1800, | |
| "elo_max": 1900, | |
| "max_games": 30000000, | |
| "val_games": 50000, | |
| "min_ply": 10, | |
| "total_steps": 200000, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "warmup_frac": 0.02, | |
| "warmup_steps": null, | |
| "max_grad_norm": 1.0, | |
| "patience": 9999, | |
| "eval_interval": 2500, | |
| "log_interval": 100, | |
| "pause_after_steps": 15000, | |
| "mate_boost": 0.0, | |
| "no_outcome_token": false, | |
| "discard_ply_limit": false, | |
| "amp_dtype": "bfloat16", | |
| "no_compile": true, | |
| "sdpa_math": true, | |
| "num_workers": 2, | |
| "device": "cuda", | |
| "log_dir": "/workspace/logs", | |
| "hf_repo": null, | |
| "local_checkpoints": true, | |
| "resume": null, | |
| "wandb": false, | |
| "cache_dir": "/dev/shm/pawn_cache", | |
| "run_type": "adapter", | |
| "strategy": "bottleneck", | |
| "checkpoint": "thomas-schweich/pawn-base", | |
| "pgn": "thomas-schweich/pawn-lichess-full", | |
| "adapter_layers": "4,5,6,7", | |
| "bottleneck_dim": 2440, | |
| "no_adapt_attn": false, | |
| "no_adapt_ffn": false, | |
| "lora_rank": null, | |
| "lora_targets": null, | |
| "lora_ffn": false, | |
| "density": null, | |
| "sparse_targets": null, | |
| "sparse_ffn": false, | |
| "use_output_film": false, | |
| "rosa_mode": null, | |
| "rosa_warmup_steps": 128, | |
| "mask_samples": 32, | |
| "grad_alpha": 2, | |
| "unfreeze_layers": null, | |
| "d_model": null, | |
| "n_layers": null, | |
| "n_heads": null, | |
| "epochs": 9999, | |
| "val_every": 9999 | |
| }, | |
| "status": "completed", | |
| "pid": 78502, | |
| "gpu_id": 0, | |
| "start_time": 1775352861.543256, | |
| "end_time": 1775356243.7400186, | |
| "current_step": 0, | |
| "total_steps": 200000, | |
| "steps_per_sec": 0.0, | |
| "last_train_loss": null, | |
| "last_train_acc": null, | |
| "best_val_loss": null, | |
| "best_accuracy": null, | |
| "actual_param_count": null, | |
| "log_path": "/workspace/sweep_results/trial_0010.log", | |
| "run_dir": null, | |
| "optuna_number": null, | |
| "notes": "", | |
| "tags": [ | |
| "18h-push", | |
| "20M", | |
| "bottleneck", | |
| "scaling" | |
| ], | |
| "eta_seconds": null, | |
| "eta_human": "?", | |
| "elapsed_human": "15h26m" | |
| } | |
| }, | |
| "event_seq": 27, | |
| "start_time": 1775342775.318943, | |
| "cost_per_hour": 2.49 | |
| } |
Xet Storage Details
- Size:
- 47.4 kB
- Xet hash:
- 1953023a05efa261201d13ec6f0839a775e1f7d503b91f10a20f3e23754ccab3
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.