KokosDev commited on
Commit ·
bc702e3
1
Parent(s): 6f23d58
Major update: Add MLP→CLT mappings + retrained transcoders
Browse files- Added mapping_L{0-30}.pt files with MLP→CLT correlation mappings
- Retrained all 31 transcoders with improved training (5000 steps each)
- Updated README with comprehensive usage documentation
- Added training_summary.json with detailed metrics
- All layers achieve 0% dead features
- Sparsity: 2-29% L0 (layer-dependent, deeper layers sparser)
- Reconstruction loss: <0.1 for most layers
Key features:
✅ MLP→CLT co-activation based mapping for feature attribution
✅ Decoder weights for CLT→MLP reconstruction
✅ Complete training metadata and metrics
✅ Ready for circuit-tracer style attribution analysis
This view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +0 -34
- README.md +190 -64
- config.json +0 -40
- transcoder_L0.safetensors → mapping_L0.pt +2 -2
- transcoder_L1.safetensors → mapping_L1.pt +2 -2
- transcoder_L10.safetensors → mapping_L10.pt +2 -2
- transcoder_L11.safetensors → mapping_L11.pt +2 -2
- mapping_L12.pt +3 -0
- mapping_L13.pt +3 -0
- mapping_L14.pt +3 -0
- mapping_L15.pt +3 -0
- mapping_L16.pt +3 -0
- mapping_L17.pt +3 -0
- mapping_L18.pt +3 -0
- mapping_L19.pt +3 -0
- mapping_L2.pt +3 -0
- mapping_L20.pt +3 -0
- mapping_L21.pt +3 -0
- mapping_L22.pt +3 -0
- mapping_L23.pt +3 -0
- mapping_L24.pt +3 -0
- mapping_L25.pt +3 -0
- mapping_L26.pt +3 -0
- mapping_L27.pt +3 -0
- mapping_L28.pt +3 -0
- mapping_L29.pt +3 -0
- mapping_L3.pt +3 -0
- mapping_L30.pt +3 -0
- mapping_L4.pt +3 -0
- mapping_L5.pt +3 -0
- mapping_L6.pt +3 -0
- mapping_L7.pt +3 -0
- mapping_L8.pt +3 -0
- mapping_L9.pt +3 -0
- metrics/metrics_L0.json +0 -185
- metrics/metrics_L1.json +0 -185
- metrics/metrics_L10.json +0 -185
- metrics/metrics_L11.json +0 -185
- metrics/metrics_L12.json +0 -185
- metrics/metrics_L13.json +0 -185
- metrics/metrics_L14.json +0 -185
- metrics/metrics_L15.json +0 -185
- metrics/metrics_L16.json +0 -185
- metrics/metrics_L17.json +0 -185
- metrics/metrics_L18.json +0 -185
- metrics/metrics_L19.json +0 -185
- metrics/metrics_L2.json +0 -185
- metrics/metrics_L20.json +0 -185
- metrics/metrics_L21.json +0 -185
- metrics/metrics_L22.json +0 -185
.gitattributes
CHANGED
|
@@ -1,35 +1 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,101 +1,227 @@
|
|
| 1 |
-
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
-
|
| 9 |
-
-
|
| 10 |
-
-
|
| 11 |
-
-
|
| 12 |
-
-
|
| 13 |
-
|
| 14 |
-
pipeline_tag: feature-extraction
|
| 15 |
-
datasets:
|
| 16 |
-
- liuhaotian/LLaVA-Instruct-150K
|
| 17 |
---
|
| 18 |
|
| 19 |
-
#
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
- Mode: CLT (Cross-Layer Transcoder)
|
| 25 |
- Hidden dim: 4096
|
| 26 |
-
- Feature dim: 8192 (
|
| 27 |
-
-
|
| 28 |
-
-
|
| 29 |
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
```python
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
#
|
|
|
|
|
|
|
|
|
|
| 41 |
import torch.nn as nn
|
|
|
|
| 42 |
class Transcoder(nn.Module):
|
| 43 |
-
def __init__(self, hidden_dim
|
| 44 |
super().__init__()
|
| 45 |
self.enc = nn.Sequential(
|
| 46 |
nn.LayerNorm(hidden_dim),
|
| 47 |
nn.Linear(hidden_dim, feature_dim),
|
| 48 |
)
|
| 49 |
self.dec = nn.Linear(feature_dim, hidden_dim)
|
| 50 |
-
|
|
|
|
| 51 |
z_pre = self.enc(x)
|
| 52 |
z = torch.relu(z_pre)
|
| 53 |
y_hat = self.dec(z)
|
| 54 |
-
return y_hat, z
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
```
|
| 60 |
|
| 61 |
-
##
|
|
|
|
|
|
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
```
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
```
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
- Optimizer: AdamW, LR=3e-4, 5000 steps per layer
|
| 83 |
-
- AMP: bf16/fp16 depending on hardware
|
| 84 |
-
- TopK sparsity: ~12% target (achieved ~11–12% on later layers)
|
| 85 |
|
| 86 |
-
##
|
|
|
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
- Activations captured from `llava-hf/llava-1.5-7b-hf` at seq_len=512, then consolidated into per-layer batch files for CLT training
|
| 91 |
|
| 92 |
-
##
|
|
|
|
| 93 |
|
| 94 |
-
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
## License
|
| 98 |
|
| 99 |
-
Apache
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
|
|
|
|
|
|
|
|
|
| 101 |
|
|
|
|
| 1 |
+
# LLaVA-1.5-7B Cross-Layer Transcoders (CLTs)
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
This repository contains **Cross-Layer Transcoders (CLTs)** trained on [llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf) for mechanistic interpretability research. CLTs are sparse autoencoders that decompose dense MLP activations into interpretable features, enabling attribution analysis and feature steering in vision-language models.
|
| 6 |
+
|
| 7 |
+
### Key Features
|
| 8 |
+
- ✅ **31 layers of transcoders** (L0-L30) covering all LLaVA language model layers
|
| 9 |
+
- ✅ **MLP→CLT mappings** for every layer (co-activation based correlation)
|
| 10 |
+
- ✅ **Decoder weights** for CLT→MLP reconstruction
|
| 11 |
+
- ✅ **0% dead features** across all layers
|
| 12 |
+
- ✅ **2-5% average sparsity** in early/middle layers (interpretable and efficient)
|
| 13 |
+
|
|
|
|
|
|
|
|
|
|
| 14 |
---
|
| 15 |
|
| 16 |
+
## Architecture
|
| 17 |
|
| 18 |
+
```
|
| 19 |
+
Input (MLP hidden state): [batch, seq_len, 4096]
|
| 20 |
+
↓
|
| 21 |
+
Transcoder Encoder: LayerNorm + Linear(4096 → 8192) + ReLU
|
| 22 |
+
↓
|
| 23 |
+
Sparse Features: [batch, seq_len, 8192] (~2-5% active)
|
| 24 |
+
↓
|
| 25 |
+
Transcoder Decoder: Linear(8192 → 4096)
|
| 26 |
+
↓
|
| 27 |
+
Output (MLP reconstruction): [batch, seq_len, 4096]
|
| 28 |
+
```
|
| 29 |
|
| 30 |
+
**Parameters per layer:**
|
|
|
|
| 31 |
- Hidden dim: 4096
|
| 32 |
+
- Feature dim: 8192 (2× expansion)
|
| 33 |
+
- Total parameters per transcoder: ~67M
|
| 34 |
+
- Sparsity: 2-29% L0 (layer-dependent, deeper layers are sparser)
|
| 35 |
|
| 36 |
+
---
|
| 37 |
+
|
| 38 |
+
## Training Details
|
| 39 |
+
|
| 40 |
+
- **Model**: `llava-hf/llava-1.5-7b-hf`
|
| 41 |
+
- **Dataset**: ~45K multimodal samples (Flickr30K + instruction tasks)
|
| 42 |
+
- **Steps per layer**: 5,000
|
| 43 |
+
- **Learning rate**: 3e-4 (AdamW)
|
| 44 |
+
- **Batch size**: 16 samples
|
| 45 |
+
- **Sparsity penalty**: 0.01 (L1 on features)
|
| 46 |
+
- **Validation**: Every 200 steps
|
| 47 |
+
|
| 48 |
+
### Training Quality Metrics
|
| 49 |
+
|
| 50 |
+
| Layer Range | Avg Sparsity (L0%) | Avg Reconstruction Loss | Dead Features |
|
| 51 |
+
|-------------|-------------------|------------------------|---------------|
|
| 52 |
+
| L0-L10 | 2-4% | 0.05-0.15 | 0% |
|
| 53 |
+
| L11-L18 | 3-7% | 0.05-0.10 | 0% |
|
| 54 |
+
| L19-L30 | 7-29% | 0.05-0.20 | 0% |
|
| 55 |
+
|
| 56 |
+
**Note**: Higher sparsity in deeper layers (L19-L30) is expected behavior in transformers, where later layers are more specialized.
|
| 57 |
+
|
| 58 |
+
---
|
| 59 |
+
|
| 60 |
+
## Files
|
| 61 |
+
|
| 62 |
+
Each layer has two files:
|
| 63 |
+
|
| 64 |
+
### 1. `transcoder_L{layer}.pt`
|
| 65 |
+
Contains the trained transcoder model and training metadata.
|
| 66 |
|
| 67 |
```python
|
| 68 |
+
checkpoint = torch.load('transcoder_L5.pt')
|
| 69 |
+
# Keys: 'layer', 'hidden_dim', 'feature_dim', 'state_dict', 'training_metadata', 'mlp_to_clt_mapping'
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
### 2. `mapping_L{layer}.pt`
|
| 73 |
+
Contains MLP→CLT mapping and decoder weights for analysis.
|
| 74 |
|
| 75 |
+
```python
|
| 76 |
+
mapping = torch.load('mapping_L5.pt')
|
| 77 |
+
# Keys: 'layer', 'mlp_to_clt_mapping', 'decoder_weights', 'hidden_dim', 'feature_dim', 'description'
|
| 78 |
+
|
| 79 |
+
# mlp_to_clt_mapping: [4096, 8192] - which MLP neurons correlate with each CLT feature
|
| 80 |
+
# decoder_weights: [4096, 8192] - CLT → MLP reconstruction weights
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
## Usage
|
| 86 |
|
| 87 |
+
### 1. Load a Transcoder
|
| 88 |
+
|
| 89 |
+
```python
|
| 90 |
+
import torch
|
| 91 |
import torch.nn as nn
|
| 92 |
+
|
| 93 |
class Transcoder(nn.Module):
|
| 94 |
+
def __init__(self, hidden_dim: int, feature_dim: int):
|
| 95 |
super().__init__()
|
| 96 |
self.enc = nn.Sequential(
|
| 97 |
nn.LayerNorm(hidden_dim),
|
| 98 |
nn.Linear(hidden_dim, feature_dim),
|
| 99 |
)
|
| 100 |
self.dec = nn.Linear(feature_dim, hidden_dim)
|
| 101 |
+
|
| 102 |
+
def forward(self, x):
|
| 103 |
z_pre = self.enc(x)
|
| 104 |
z = torch.relu(z_pre)
|
| 105 |
y_hat = self.dec(z)
|
| 106 |
+
return y_hat, z # reconstruction, features
|
| 107 |
+
|
| 108 |
+
# Load Layer 10 transcoder
|
| 109 |
+
checkpoint = torch.load('transcoder_L10.pt', map_location='cpu')
|
| 110 |
+
hidden_dim = checkpoint['hidden_dim']
|
| 111 |
+
feature_dim = checkpoint['feature_dim']
|
| 112 |
+
|
| 113 |
+
transcoder = Transcoder(hidden_dim, feature_dim)
|
| 114 |
+
transcoder.load_state_dict(checkpoint['state_dict'])
|
| 115 |
+
transcoder.eval()
|
| 116 |
+
|
| 117 |
+
# Use with LLaVA MLP outputs
|
| 118 |
+
with torch.no_grad():
|
| 119 |
+
mlp_output = ... # [batch, seq_len, 4096] from LLaVA layer 10
|
| 120 |
+
reconstruction, features = transcoder(mlp_output)
|
| 121 |
+
|
| 122 |
+
# features: [batch, seq_len, 8192] - sparse interpretable features
|
| 123 |
+
# reconstruction: [batch, seq_len, 4096] - reconstructed MLP output
|
| 124 |
```
|
| 125 |
|
| 126 |
+
### 2. Use MLP→CLT Mapping
|
| 127 |
+
|
| 128 |
+
The mapping shows which MLP neurons are correlated with each CLT feature:
|
| 129 |
|
| 130 |
+
```python
|
| 131 |
+
mapping_data = torch.load('mapping_L10.pt', map_location='cpu')
|
| 132 |
+
mlp_to_clt = mapping_data['mlp_to_clt_mapping'] # [4096, 8192]
|
| 133 |
+
|
| 134 |
+
# Find top MLP neurons for a specific CLT feature
|
| 135 |
+
feature_idx = 1234
|
| 136 |
+
top_mlp_neurons = mlp_to_clt[:, feature_idx].topk(k=10)
|
| 137 |
+
print(f"Top MLP neurons for feature {feature_idx}: {top_mlp_neurons.indices}")
|
| 138 |
+
|
| 139 |
+
# Find top CLT features for a specific MLP neuron
|
| 140 |
+
mlp_neuron_idx = 567
|
| 141 |
+
top_clt_features = mlp_to_clt[mlp_neuron_idx, :].topk(k=10)
|
| 142 |
+
print(f"Top CLT features for MLP neuron {mlp_neuron_idx}: {top_clt_features.indices}")
|
| 143 |
```
|
| 144 |
+
|
| 145 |
+
### 3. Replacement Model (Full Integration)
|
| 146 |
+
|
| 147 |
+
For direct integration into LLaVA (replace MLPs with CLTs):
|
| 148 |
+
|
| 149 |
+
```python
|
| 150 |
+
from transformers import LlavaForConditionalGeneration
|
| 151 |
+
|
| 152 |
+
# Load LLaVA
|
| 153 |
+
model = LlavaForConditionalGeneration.from_pretrained(
|
| 154 |
+
"llava-hf/llava-1.5-7b-hf",
|
| 155 |
+
torch_dtype=torch.bfloat16,
|
| 156 |
+
device_map="auto"
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
# Replace MLP in layer 10 with CLT (example)
|
| 160 |
+
layer_idx = 10
|
| 161 |
+
checkpoint = torch.load(f'transcoder_L{layer_idx}.pt')
|
| 162 |
+
transcoder = Transcoder(checkpoint['hidden_dim'], checkpoint['feature_dim'])
|
| 163 |
+
transcoder.load_state_dict(checkpoint['state_dict'])
|
| 164 |
+
|
| 165 |
+
# Hook to replace MLP forward pass
|
| 166 |
+
def replace_mlp_with_clt(module, input, output):
|
| 167 |
+
hidden_state = input[0]
|
| 168 |
+
reconstruction, features = transcoder(hidden_state)
|
| 169 |
+
return reconstruction
|
| 170 |
+
|
| 171 |
+
model.model.layers[layer_idx].mlp.register_forward_hook(replace_mlp_with_clt)
|
| 172 |
```
|
| 173 |
|
| 174 |
+
---
|
| 175 |
+
|
| 176 |
+
## Applications
|
| 177 |
|
| 178 |
+
### 1. **Feature Attribution**
|
| 179 |
+
Identify which features contribute to specific model outputs (hallucination detection, sycophancy analysis).
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
+
### 2. **Feature Steering**
|
| 182 |
+
Amplify or suppress specific features at inference time to modify model behavior (reduce hallucinations, improve grounding).
|
| 183 |
|
| 184 |
+
### 3. **Mechanistic Interpretability**
|
| 185 |
+
Build attribution graphs showing causal relationships between features and outputs.
|
|
|
|
| 186 |
|
| 187 |
+
### 4. **Circuit Discovery**
|
| 188 |
+
Map feature interactions across layers to understand how the model processes multimodal information.
|
| 189 |
|
| 190 |
+
---
|
| 191 |
+
|
| 192 |
+
## Related Work
|
| 193 |
+
|
| 194 |
+
This work extends Anthropic's Circuit-Tracer methodology to multimodal vision-language models:
|
| 195 |
+
- [Circuit-Tracer Paper](https://transformer-circuits.pub/2025/attribution-graphs/methods.html)
|
| 196 |
+
- [Sparse Autoencoders for Interpretability](https://transformer-circuits.pub/2023/monosemantic-features/index.html)
|
| 197 |
+
|
| 198 |
+
---
|
| 199 |
+
|
| 200 |
+
## Citation
|
| 201 |
+
|
| 202 |
+
If you use these transcoders in your research, please cite:
|
| 203 |
+
|
| 204 |
+
```bibtex
|
| 205 |
+
@misc{llava15_clts_2025,
|
| 206 |
+
title={Cross-Layer Transcoders for LLaVA-1.5-7B},
|
| 207 |
+
author={Koko's Dev},
|
| 208 |
+
year={2025},
|
| 209 |
+
publisher={HuggingFace Hub},
|
| 210 |
+
howpublished={\url{https://huggingface.co/KokosDev/llava15-7b-clt}}
|
| 211 |
+
}
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
---
|
| 215 |
|
| 216 |
## License
|
| 217 |
|
| 218 |
+
These transcoders are released under the same license as the base model (Apache 2.0). The base LLaVA-1.5-7B model is from [llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf).
|
| 219 |
+
|
| 220 |
+
---
|
| 221 |
+
|
| 222 |
+
## Acknowledgments
|
| 223 |
|
| 224 |
+
- **Base Model**: [LLaVA-1.5-7B](https://huggingface.co/llava-hf/llava-1.5-7b-hf)
|
| 225 |
+
- **Methodology**: Inspired by Anthropic's Circuit-Tracer and sparse autoencoder research
|
| 226 |
+
- **Training Data**: Flickr30K, instruction-following datasets
|
| 227 |
|
config.json
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"base_model": "llava-hf/llava-1.5-7b-hf",
|
| 3 |
-
"mode": "CLT",
|
| 4 |
-
"num_layers": 31,
|
| 5 |
-
"layers": [
|
| 6 |
-
0,
|
| 7 |
-
1,
|
| 8 |
-
2,
|
| 9 |
-
3,
|
| 10 |
-
4,
|
| 11 |
-
5,
|
| 12 |
-
6,
|
| 13 |
-
7,
|
| 14 |
-
8,
|
| 15 |
-
9,
|
| 16 |
-
10,
|
| 17 |
-
11,
|
| 18 |
-
12,
|
| 19 |
-
13,
|
| 20 |
-
14,
|
| 21 |
-
15,
|
| 22 |
-
16,
|
| 23 |
-
17,
|
| 24 |
-
18,
|
| 25 |
-
19,
|
| 26 |
-
20,
|
| 27 |
-
21,
|
| 28 |
-
22,
|
| 29 |
-
23,
|
| 30 |
-
24,
|
| 31 |
-
25,
|
| 32 |
-
26,
|
| 33 |
-
27,
|
| 34 |
-
28,
|
| 35 |
-
29,
|
| 36 |
-
30
|
| 37 |
-
],
|
| 38 |
-
"hidden_dim": 4096,
|
| 39 |
-
"feature_dim": 8192
|
| 40 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
transcoder_L0.safetensors → mapping_L0.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:926764d522494fab99349e5047f7826bf947976ad9738558a2d43fcf31f24642
|
| 3 |
+
size 201328637
|
transcoder_L1.safetensors → mapping_L1.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac28c58669174849b7cea5b1e74f0888fb31fecd15a2f5ee6c5a90e19ada295c
|
| 3 |
+
size 201328637
|
transcoder_L10.safetensors → mapping_L10.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49d109136f4ad7d98409b455573405e0735aaeb8154e983f0cca7465a07bd699
|
| 3 |
+
size 201328645
|
transcoder_L11.safetensors → mapping_L11.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7948d7131b865188c4853cf2313260788e5e29afd1119c5a5da3820a0cba8d7c
|
| 3 |
+
size 201328645
|
mapping_L12.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:001461dbd47fde64883a7795080898124517902f65669d7b200618f38229cb63
|
| 3 |
+
size 201328645
|
mapping_L13.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e51581cbd3921b0028a1f83bcaaae48d7816d29adc93a44092349425fb7f2d45
|
| 3 |
+
size 201328645
|
mapping_L14.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d01550fd3f7c7f755709ab41cdbc5cd2d7cd71bfaf08d50d7a8891d29a7e9e55
|
| 3 |
+
size 201328645
|
mapping_L15.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b47dc82af9670279792da11774ab38e5c0e3d2438cfc7a0ac368b5333ae7028
|
| 3 |
+
size 201328645
|
mapping_L16.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df805b6e3010a3157bb461936f5b1c78c62a26f288aca5cba5494d146dbb66d6
|
| 3 |
+
size 201328645
|
mapping_L17.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ac626f96b0b5c274c8cef6c76616879fb1556e417153419d164bd2805502d3f
|
| 3 |
+
size 201328645
|
mapping_L18.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e026f2608e3ba31b8ee7e087112b1687526e42b21efa132030c96c25b1f3304
|
| 3 |
+
size 201328645
|
mapping_L19.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcd373d9432a95e50384ecc54135eabedf289b0480334dbbb3575ac46221f4a1
|
| 3 |
+
size 201328645
|
mapping_L2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04e4e92c46fcd8fbfe3411d8905a82af869701c4348d5c95e0e253c7dec9b6fd
|
| 3 |
+
size 201328637
|
mapping_L20.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:630ad511f3fe8a53eff06bc385277726966b4303d192451748bb8e1f83505b9a
|
| 3 |
+
size 201328645
|
mapping_L21.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c2565eafc65c19cfe919b40d425d541b0c6e3442cef7838e9ee288b70229068
|
| 3 |
+
size 201328645
|
mapping_L22.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ab93c3c8ad2b952174eaf2556f8390857955c2a7c62b0b43e4c15550d2cf924
|
| 3 |
+
size 201328645
|
mapping_L23.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd0d8568442be298b1013327692d59428ef70a87c594a142d0de1b3da61b649d
|
| 3 |
+
size 201328645
|
mapping_L24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40e1550373734e47dee1de1dd10bc7777016be3d3a83982badff7b04c72c67ee
|
| 3 |
+
size 201328645
|
mapping_L25.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7500aa2a3757114f460c1274774856e420aca82fd9919481ffe57890cbc068b5
|
| 3 |
+
size 201328645
|
mapping_L26.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24f3f80e2e289eda2a424b43c41e3137b190a8bf43672515285cc1d9b835e02f
|
| 3 |
+
size 201328645
|
mapping_L27.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e17d03cf4b72e617c34f45ed34783b10cc1f8929c03b32e64f512458d13df7b9
|
| 3 |
+
size 201328645
|
mapping_L28.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3299928c35caa7afa9980427d54c0b8feda5aa2df4bd2cd2b81a1b6a7e977291
|
| 3 |
+
size 201328645
|
mapping_L29.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5dd5a230f203706f0bdfdc92ee53fa93ab295c66847d8cc1d328a41c37ba1438
|
| 3 |
+
size 201328645
|
mapping_L3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a807b4bfb7598cf86dafd7a4f50452bb5709db763562dc5609a6ad44e737f58b
|
| 3 |
+
size 201328637
|
mapping_L30.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7161153190b60857244937449ca376718b2d993a62e4c32c35b7702d832869f
|
| 3 |
+
size 201328645
|
mapping_L4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0871af791813ec4c33ffa6ae3b1df89c042277887f4653d4d7cc96f4544f088b
|
| 3 |
+
size 201328637
|
mapping_L5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e9bf0d2d4d8d8ab2ba2ec346403f3ca36fcfcc774c58f0d3c7300a3f0fb6abf
|
| 3 |
+
size 201328637
|
mapping_L6.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b6015468b1ac57bf8e206aabb76d8d2db229a8a155861782d7a03e8b49c1bf4
|
| 3 |
+
size 201328637
|
mapping_L7.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e269cb4328bc01c7402a4024ea2fdd04652c5ee714346268a6168f366fc74e15
|
| 3 |
+
size 201328637
|
mapping_L8.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb2958b47d295bfd367206933273952174df3432a5f622cf90c07be8b72a5986
|
| 3 |
+
size 201328637
|
mapping_L9.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8876128aace266d31987461c15548a2d5c770f3ec0d4ebda28ce27ef89eb25b5
|
| 3 |
+
size 201328637
|
metrics/metrics_L0.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 0,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.0003072486724704504,
|
| 7 |
-
"val_loss": 0.0003067493438720703,
|
| 8 |
-
"train_l0": 0.47220587730407715,
|
| 9 |
-
"val_l0": 0.3172755241394043
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.00025499684852547944,
|
| 14 |
-
"val_loss": 0.00026645660400390623,
|
| 15 |
-
"train_l0": 0.3529787063598633,
|
| 16 |
-
"val_l0": 0.3875422477722168
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.00025995602481998503,
|
| 21 |
-
"val_loss": 0.00023263096809387208,
|
| 22 |
-
"train_l0": 0.32089948654174805,
|
| 23 |
-
"val_l0": 0.34657716751098633
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.00024188542738556862,
|
| 28 |
-
"val_loss": 0.00024361610412597655,
|
| 29 |
-
"train_l0": 0.2146601676940918,
|
| 30 |
-
"val_l0": 0.24510860443115234
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.0002084258885588497,
|
| 35 |
-
"val_loss": 0.0002314150333404541,
|
| 36 |
-
"train_l0": 0.2898752689361572,
|
| 37 |
-
"val_l0": 0.22312402725219727
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.00025186064885929227,
|
| 42 |
-
"val_loss": 0.00021649599075317382,
|
| 43 |
-
"train_l0": 0.22274255752563477,
|
| 44 |
-
"val_l0": 0.22777795791625977
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.00023313461861107498,
|
| 49 |
-
"val_loss": 0.000215911865234375,
|
| 50 |
-
"train_l0": 0.21011829376220703,
|
| 51 |
-
"val_l0": 0.22365093231201172
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.0002374562027398497,
|
| 56 |
-
"val_loss": 0.00021054744720458985,
|
| 57 |
-
"train_l0": 0.1807570457458496,
|
| 58 |
-
"val_l0": 0.2297806739807129
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.00022862674086354673,
|
| 63 |
-
"val_loss": 0.00022011399269104004,
|
| 64 |
-
"train_l0": 0.19682049751281738,
|
| 65 |
-
"val_l0": 0.1999950408935547
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.00020527455490082502,
|
| 70 |
-
"val_loss": 0.0001972675323486328,
|
| 71 |
-
"train_l0": 0.22835731506347656,
|
| 72 |
-
"val_l0": 0.26402711868286133
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.00020355929154902697,
|
| 77 |
-
"val_loss": 0.00022192001342773436,
|
| 78 |
-
"train_l0": 0.229644775390625,
|
| 79 |
-
"val_l0": 0.2043318748474121
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.00021205551456660032,
|
| 84 |
-
"val_loss": 0.0002220630645751953,
|
| 85 |
-
"train_l0": 0.2176821231842041,
|
| 86 |
-
"val_l0": 0.22295713424682617
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.00021079782163724303,
|
| 91 |
-
"val_loss": 0.00021343231201171876,
|
| 92 |
-
"train_l0": 0.21379590034484863,
|
| 93 |
-
"val_l0": 0.21446943283081055
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.00022800829901825637,
|
| 98 |
-
"val_loss": 0.00023617744445800782,
|
| 99 |
-
"train_l0": 0.1966416835784912,
|
| 100 |
-
"val_l0": 0.19351720809936523
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.00020191156363580376,
|
| 105 |
-
"val_loss": 0.00021142959594726561,
|
| 106 |
-
"train_l0": 0.2100825309753418,
|
| 107 |
-
"val_l0": 0.21882295608520508
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.00021879124687984586,
|
| 112 |
-
"val_loss": 0.00023055076599121094,
|
| 113 |
-
"train_l0": 0.22339820861816406,
|
| 114 |
-
"val_l0": 0.20228862762451172
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.0001810171379474923,
|
| 119 |
-
"val_loss": 0.00021448135375976563,
|
| 120 |
-
"train_l0": 0.23145079612731934,
|
| 121 |
-
"val_l0": 0.23223161697387695
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.00018641870701685548,
|
| 126 |
-
"val_loss": 0.0002062082290649414,
|
| 127 |
-
"train_l0": 0.21660923957824707,
|
| 128 |
-
"val_l0": 0.22423267364501953
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.00017806273535825312,
|
| 133 |
-
"val_loss": 0.00022385120391845703,
|
| 134 |
-
"train_l0": 0.1947641372680664,
|
| 135 |
-
"val_l0": 0.18842220306396484
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.00017908113659359515,
|
| 140 |
-
"val_loss": 0.0002302885055541992,
|
| 141 |
-
"train_l0": 0.21029114723205566,
|
| 142 |
-
"val_l0": 0.18979549407958984
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.0002036196819972247,
|
| 147 |
-
"val_loss": 0.00022821426391601564,
|
| 148 |
-
"train_l0": 0.19829273223876953,
|
| 149 |
-
"val_l0": 0.1981973648071289
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.0002189793303841725,
|
| 154 |
-
"val_loss": 0.00019947290420532226,
|
| 155 |
-
"train_l0": 0.19347071647644043,
|
| 156 |
-
"val_l0": 0.19977569580078125
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.00017246737843379378,
|
| 161 |
-
"val_loss": 0.00020782947540283204,
|
| 162 |
-
"train_l0": 0.2507627010345459,
|
| 163 |
-
"val_l0": 0.21579265594482422
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.00017646313062869012,
|
| 168 |
-
"val_loss": 0.0002214670181274414,
|
| 169 |
-
"train_l0": 0.23015737533569336,
|
| 170 |
-
"val_l0": 0.21255731582641602
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.00021874290541745722,
|
| 175 |
-
"val_loss": 0.00021724700927734376,
|
| 176 |
-
"train_l0": 0.2058267593383789,
|
| 177 |
-
"val_l0": 0.20237207412719727
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 0.2058267593383789
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L1.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 1,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.9419434070587158,
|
| 7 |
-
"val_loss": 0.693658447265625,
|
| 8 |
-
"train_l0": 10.727488994598389,
|
| 9 |
-
"val_l0": 10.474152565002441
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.7538449764251709,
|
| 14 |
-
"val_loss": 0.53499755859375,
|
| 15 |
-
"train_l0": 10.236090421676636,
|
| 16 |
-
"val_l0": 10.744032859802246
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.9511469602584839,
|
| 21 |
-
"val_loss": 0.40771331787109377,
|
| 22 |
-
"train_l0": 9.165537357330322,
|
| 23 |
-
"val_l0": 10.436451435089111
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.5759391188621521,
|
| 28 |
-
"val_loss": 1.3465713500976562,
|
| 29 |
-
"train_l0": 10.276931524276733,
|
| 30 |
-
"val_l0": 8.959136009216309
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 1.637937068939209,
|
| 35 |
-
"val_loss": 1.3444091796875,
|
| 36 |
-
"train_l0": 7.751274108886719,
|
| 37 |
-
"val_l0": 8.770787715911865
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.6342856884002686,
|
| 42 |
-
"val_loss": 0.36365509033203125,
|
| 43 |
-
"train_l0": 8.721637725830078,
|
| 44 |
-
"val_l0": 9.692184925079346
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.6578925848007202,
|
| 49 |
-
"val_loss": 0.6135894775390625,
|
| 50 |
-
"train_l0": 9.744536876678467,
|
| 51 |
-
"val_l0": 9.652924537658691
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.8243253827095032,
|
| 56 |
-
"val_loss": 1.1909713745117188,
|
| 57 |
-
"train_l0": 9.919911623001099,
|
| 58 |
-
"val_l0": 9.420511722564697
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.3240302503108978,
|
| 63 |
-
"val_loss": 0.5864349365234375,
|
| 64 |
-
"train_l0": 10.426890850067139,
|
| 65 |
-
"val_l0": 9.911370277404785
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 1.614189624786377,
|
| 70 |
-
"val_loss": 0.5590194702148438,
|
| 71 |
-
"train_l0": 8.73565673828125,
|
| 72 |
-
"val_l0": 9.86403226852417
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.8542783856391907,
|
| 77 |
-
"val_loss": 1.0432174682617188,
|
| 78 |
-
"train_l0": 9.816688299179077,
|
| 79 |
-
"val_l0": 9.802188873291016
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 1.3850717544555664,
|
| 84 |
-
"val_loss": 1.02093505859375,
|
| 85 |
-
"train_l0": 9.431636333465576,
|
| 86 |
-
"val_l0": 9.918689727783203
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 1.3293514251708984,
|
| 91 |
-
"val_loss": 1.1007492065429687,
|
| 92 |
-
"train_l0": 9.813636541366577,
|
| 93 |
-
"val_l0": 10.1796293258667
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.8528616428375244,
|
| 98 |
-
"val_loss": 1.0055511474609375,
|
| 99 |
-
"train_l0": 10.77679991722107,
|
| 100 |
-
"val_l0": 9.498050212860107
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 1.6519041061401367,
|
| 105 |
-
"val_loss": 0.6967056274414063,
|
| 106 |
-
"train_l0": 9.290111064910889,
|
| 107 |
-
"val_l0": 10.083272457122803
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 1.3391976356506348,
|
| 112 |
-
"val_loss": 1.432598876953125,
|
| 113 |
-
"train_l0": 9.429383277893066,
|
| 114 |
-
"val_l0": 8.872983455657959
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 1.6542689800262451,
|
| 119 |
-
"val_loss": 0.4710418701171875,
|
| 120 |
-
"train_l0": 9.355288743972778,
|
| 121 |
-
"val_l0": 10.494527816772461
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 1.365459680557251,
|
| 126 |
-
"val_loss": 1.354937744140625,
|
| 127 |
-
"train_l0": 9.697532653808594,
|
| 128 |
-
"val_l0": 9.892845153808594
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 1.3053505420684814,
|
| 133 |
-
"val_loss": 0.9811141967773438,
|
| 134 |
-
"train_l0": 8.926576375961304,
|
| 135 |
-
"val_l0": 9.051361083984375
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.5850197672843933,
|
| 140 |
-
"val_loss": 1.3319610595703124,
|
| 141 |
-
"train_l0": 9.88953709602356,
|
| 142 |
-
"val_l0": 9.219849109649658
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.31436559557914734,
|
| 147 |
-
"val_loss": 0.7597930908203125,
|
| 148 |
-
"train_l0": 9.815418720245361,
|
| 149 |
-
"val_l0": 9.851465225219727
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 1.077832579612732,
|
| 154 |
-
"val_loss": 1.079656982421875,
|
| 155 |
-
"train_l0": 9.930789470672607,
|
| 156 |
-
"val_l0": 10.260002613067627
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.5662795305252075,
|
| 161 |
-
"val_loss": 0.9822235107421875,
|
| 162 |
-
"train_l0": 10.273551940917969,
|
| 163 |
-
"val_l0": 9.154860973358154
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 1.9361178874969482,
|
| 168 |
-
"val_loss": 0.596685791015625,
|
| 169 |
-
"train_l0": 9.330040216445923,
|
| 170 |
-
"val_l0": 11.255741119384766
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.0452130101621151,
|
| 175 |
-
"val_loss": 0.8519371032714844,
|
| 176 |
-
"train_l0": 11.875146627426147,
|
| 177 |
-
"val_l0": 9.815096855163574
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 11.875146627426147
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L10.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 10,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.25114452838897705,
|
| 7 |
-
"val_loss": 0.30525054931640627,
|
| 8 |
-
"train_l0": 5.801188945770264,
|
| 9 |
-
"val_l0": 5.450534820556641
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.11518851667642593,
|
| 14 |
-
"val_loss": 0.2629669189453125,
|
| 15 |
-
"train_l0": 5.111277103424072,
|
| 16 |
-
"val_l0": 5.683934688568115
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.16822582483291626,
|
| 21 |
-
"val_loss": 0.10852508544921875,
|
| 22 |
-
"train_l0": 3.7683725357055664,
|
| 23 |
-
"val_l0": 4.535846710205078
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.13124150037765503,
|
| 28 |
-
"val_loss": 0.3328338623046875,
|
| 29 |
-
"train_l0": 3.5957694053649902,
|
| 30 |
-
"val_l0": 3.9565396308898926
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.046043723821640015,
|
| 35 |
-
"val_loss": 0.06258087158203125,
|
| 36 |
-
"train_l0": 4.6050965785980225,
|
| 37 |
-
"val_l0": 4.226646423339844
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.25528469681739807,
|
| 42 |
-
"val_loss": 0.13884201049804687,
|
| 43 |
-
"train_l0": 3.949427604675293,
|
| 44 |
-
"val_l0": 3.839094638824463
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.276385635137558,
|
| 49 |
-
"val_loss": 0.20626220703125,
|
| 50 |
-
"train_l0": 3.6357343196868896,
|
| 51 |
-
"val_l0": 4.6518754959106445
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.12123381346464157,
|
| 56 |
-
"val_loss": 0.156231689453125,
|
| 57 |
-
"train_l0": 3.2030463218688965,
|
| 58 |
-
"val_l0": 4.437422752380371
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.2816537022590637,
|
| 63 |
-
"val_loss": 0.11364669799804687,
|
| 64 |
-
"train_l0": 4.115825891494751,
|
| 65 |
-
"val_l0": 3.6349129676818848
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.11043612658977509,
|
| 70 |
-
"val_loss": 0.16072845458984375,
|
| 71 |
-
"train_l0": 3.5869598388671875,
|
| 72 |
-
"val_l0": 3.8583016395568848
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.0763341560959816,
|
| 77 |
-
"val_loss": 0.1752197265625,
|
| 78 |
-
"train_l0": 3.29434871673584,
|
| 79 |
-
"val_l0": 3.7455320358276367
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.10305751115083694,
|
| 84 |
-
"val_loss": 0.15592117309570314,
|
| 85 |
-
"train_l0": 3.9294421672821045,
|
| 86 |
-
"val_l0": 3.3687186241149902
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.0026209885254502296,
|
| 91 |
-
"val_loss": 0.22261276245117187,
|
| 92 |
-
"train_l0": 2.8446197509765625,
|
| 93 |
-
"val_l0": 3.688523769378662
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.0903225839138031,
|
| 98 |
-
"val_loss": 0.11019287109375,
|
| 99 |
-
"train_l0": 3.213447332382202,
|
| 100 |
-
"val_l0": 3.316357135772705
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.11671537905931473,
|
| 105 |
-
"val_loss": 0.1362518310546875,
|
| 106 |
-
"train_l0": 3.646618127822876,
|
| 107 |
-
"val_l0": 3.2197976112365723
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.1287200003862381,
|
| 112 |
-
"val_loss": 0.17482376098632812,
|
| 113 |
-
"train_l0": 3.5375654697418213,
|
| 114 |
-
"val_l0": 3.8109779357910156
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.20451630651950836,
|
| 119 |
-
"val_loss": 0.16786842346191405,
|
| 120 |
-
"train_l0": 2.8381645679473877,
|
| 121 |
-
"val_l0": 3.196878433227539
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.07456573843955994,
|
| 126 |
-
"val_loss": 0.11831588745117187,
|
| 127 |
-
"train_l0": 3.2184183597564697,
|
| 128 |
-
"val_l0": 3.5810303688049316
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.22658288478851318,
|
| 133 |
-
"val_loss": 0.1669994354248047,
|
| 134 |
-
"train_l0": 3.7450194358825684,
|
| 135 |
-
"val_l0": 3.519585132598877
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.07381106913089752,
|
| 140 |
-
"val_loss": 0.15676345825195312,
|
| 141 |
-
"train_l0": 3.4190475940704346,
|
| 142 |
-
"val_l0": 2.9603052139282227
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.1808185875415802,
|
| 147 |
-
"val_loss": 0.14236946105957032,
|
| 148 |
-
"train_l0": 3.0088305473327637,
|
| 149 |
-
"val_l0": 3.057527542114258
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.0016893134452402592,
|
| 154 |
-
"val_loss": 0.09865303039550781,
|
| 155 |
-
"train_l0": 2.7297258377075195,
|
| 156 |
-
"val_l0": 3.4493255615234375
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.10395370423793793,
|
| 161 |
-
"val_loss": 0.1289844512939453,
|
| 162 |
-
"train_l0": 3.6900579929351807,
|
| 163 |
-
"val_l0": 3.1922101974487305
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.04912750422954559,
|
| 168 |
-
"val_loss": 0.17699928283691407,
|
| 169 |
-
"train_l0": 3.518986701965332,
|
| 170 |
-
"val_l0": 3.6074113845825195
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.16472768783569336,
|
| 175 |
-
"val_loss": 0.23595848083496093,
|
| 176 |
-
"train_l0": 3.7324130535125732,
|
| 177 |
-
"val_l0": 3.978390693664551
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 3.7324130535125732
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L11.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 11,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.09921978414058685,
|
| 7 |
-
"val_loss": 0.1084686279296875,
|
| 8 |
-
"train_l0": 6.2422215938568115,
|
| 9 |
-
"val_l0": 5.507922172546387
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.009879800491034985,
|
| 14 |
-
"val_loss": 0.1968353271484375,
|
| 15 |
-
"train_l0": 5.2117228507995605,
|
| 16 |
-
"val_l0": 5.451998710632324
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.27727043628692627,
|
| 21 |
-
"val_loss": 0.18078689575195311,
|
| 22 |
-
"train_l0": 4.555940628051758,
|
| 23 |
-
"val_l0": 3.7746644020080566
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.19308589398860931,
|
| 28 |
-
"val_loss": 0.19725723266601564,
|
| 29 |
-
"train_l0": 4.724091291427612,
|
| 30 |
-
"val_l0": 4.179141521453857
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.2370215505361557,
|
| 35 |
-
"val_loss": 0.16272201538085937,
|
| 36 |
-
"train_l0": 5.1734983921051025,
|
| 37 |
-
"val_l0": 3.8032984733581543
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.24188938736915588,
|
| 42 |
-
"val_loss": 0.23206787109375,
|
| 43 |
-
"train_l0": 4.156309366226196,
|
| 44 |
-
"val_l0": 3.619532585144043
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.2095375657081604,
|
| 49 |
-
"val_loss": 0.29433555603027345,
|
| 50 |
-
"train_l0": 3.704559803009033,
|
| 51 |
-
"val_l0": 3.7230396270751953
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.20849841833114624,
|
| 56 |
-
"val_loss": 0.15409889221191406,
|
| 57 |
-
"train_l0": 3.4005343914031982,
|
| 58 |
-
"val_l0": 3.734006881713867
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.049407750368118286,
|
| 63 |
-
"val_loss": 0.15776596069335938,
|
| 64 |
-
"train_l0": 3.005021810531616,
|
| 65 |
-
"val_l0": 4.101603031158447
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.45641109347343445,
|
| 70 |
-
"val_loss": 0.07792243957519532,
|
| 71 |
-
"train_l0": 3.6822736263275146,
|
| 72 |
-
"val_l0": 3.547511100769043
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.11361617594957352,
|
| 77 |
-
"val_loss": 0.21431961059570312,
|
| 78 |
-
"train_l0": 4.268193244934082,
|
| 79 |
-
"val_l0": 3.597719669342041
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.2159542739391327,
|
| 84 |
-
"val_loss": 0.1529510498046875,
|
| 85 |
-
"train_l0": 3.774815797805786,
|
| 86 |
-
"val_l0": 3.5473108291625977
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.22549620270729065,
|
| 91 |
-
"val_loss": 0.19571762084960936,
|
| 92 |
-
"train_l0": 4.037141799926758,
|
| 93 |
-
"val_l0": 4.042267799377441
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.37636297941207886,
|
| 98 |
-
"val_loss": 0.20887794494628906,
|
| 99 |
-
"train_l0": 3.8642048835754395,
|
| 100 |
-
"val_l0": 3.6960291862487793
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.14850756525993347,
|
| 105 |
-
"val_loss": 0.08110885620117188,
|
| 106 |
-
"train_l0": 3.3547163009643555,
|
| 107 |
-
"val_l0": 3.3356618881225586
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.18303973972797394,
|
| 112 |
-
"val_loss": 0.24963340759277344,
|
| 113 |
-
"train_l0": 4.515594244003296,
|
| 114 |
-
"val_l0": 3.8955163955688477
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.12862856686115265,
|
| 119 |
-
"val_loss": 0.13225936889648438,
|
| 120 |
-
"train_l0": 3.8597404956817627,
|
| 121 |
-
"val_l0": 3.6971187591552734
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.21040086448192596,
|
| 126 |
-
"val_loss": 0.20606117248535155,
|
| 127 |
-
"train_l0": 3.616267442703247,
|
| 128 |
-
"val_l0": 4.238801002502441
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.14177893102169037,
|
| 133 |
-
"val_loss": 0.18520660400390626,
|
| 134 |
-
"train_l0": 3.515559434890747,
|
| 135 |
-
"val_l0": 3.57072114944458
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.28014224767684937,
|
| 140 |
-
"val_loss": 0.2064342498779297,
|
| 141 |
-
"train_l0": 3.5116195678710938,
|
| 142 |
-
"val_l0": 3.8475394248962402
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.18489107489585876,
|
| 147 |
-
"val_loss": 0.14811325073242188,
|
| 148 |
-
"train_l0": 3.6059200763702393,
|
| 149 |
-
"val_l0": 3.4544014930725098
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.18512320518493652,
|
| 154 |
-
"val_loss": 0.1354084014892578,
|
| 155 |
-
"train_l0": 3.0777156352996826,
|
| 156 |
-
"val_l0": 3.6293387413024902
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.222157821059227,
|
| 161 |
-
"val_loss": 0.15824012756347655,
|
| 162 |
-
"train_l0": 4.075437784194946,
|
| 163 |
-
"val_l0": 3.5176491737365723
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.1522722989320755,
|
| 168 |
-
"val_loss": 0.1371082305908203,
|
| 169 |
-
"train_l0": 3.7998318672180176,
|
| 170 |
-
"val_l0": 3.5025715827941895
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.2167949080467224,
|
| 175 |
-
"val_loss": 0.1713542938232422,
|
| 176 |
-
"train_l0": 3.375786542892456,
|
| 177 |
-
"val_l0": 3.55027437210083
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 3.375786542892456
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L12.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 12,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.049761027097702026,
|
| 7 |
-
"val_loss": 0.23586273193359375,
|
| 8 |
-
"train_l0": 5.280417203903198,
|
| 9 |
-
"val_l0": 5.052521228790283
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.06634508073329926,
|
| 14 |
-
"val_loss": 0.3154510498046875,
|
| 15 |
-
"train_l0": 4.09318208694458,
|
| 16 |
-
"val_l0": 5.063436031341553
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.13330385088920593,
|
| 21 |
-
"val_loss": 0.191497802734375,
|
| 22 |
-
"train_l0": 4.017609357833862,
|
| 23 |
-
"val_l0": 4.022650718688965
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.21025627851486206,
|
| 28 |
-
"val_loss": 0.23423538208007813,
|
| 29 |
-
"train_l0": 3.789740800857544,
|
| 30 |
-
"val_l0": 4.507575035095215
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.20364870131015778,
|
| 35 |
-
"val_loss": 0.23608016967773438,
|
| 36 |
-
"train_l0": 3.9293766021728516,
|
| 37 |
-
"val_l0": 3.7171173095703125
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.21887055039405823,
|
| 42 |
-
"val_loss": 0.070147705078125,
|
| 43 |
-
"train_l0": 4.024946689605713,
|
| 44 |
-
"val_l0": 3.698732852935791
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.00285144941881299,
|
| 49 |
-
"val_loss": 0.17227325439453126,
|
| 50 |
-
"train_l0": 3.424978256225586,
|
| 51 |
-
"val_l0": 3.398251533508301
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.25717073678970337,
|
| 56 |
-
"val_loss": 0.20127410888671876,
|
| 57 |
-
"train_l0": 3.671365976333618,
|
| 58 |
-
"val_l0": 3.7819719314575195
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.0787108838558197,
|
| 63 |
-
"val_loss": 0.16406211853027344,
|
| 64 |
-
"train_l0": 3.618347644805908,
|
| 65 |
-
"val_l0": 3.638465404510498
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.0804997980594635,
|
| 70 |
-
"val_loss": 0.2432525634765625,
|
| 71 |
-
"train_l0": 3.5273313522338867,
|
| 72 |
-
"val_l0": 3.691577911376953
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.16888681054115295,
|
| 77 |
-
"val_loss": 0.05645675659179687,
|
| 78 |
-
"train_l0": 3.3850371837615967,
|
| 79 |
-
"val_l0": 3.2377266883850098
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.05746965855360031,
|
| 84 |
-
"val_loss": 0.21165084838867188,
|
| 85 |
-
"train_l0": 3.18300724029541,
|
| 86 |
-
"val_l0": 3.4363770484924316
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.07457253336906433,
|
| 91 |
-
"val_loss": 0.11435813903808593,
|
| 92 |
-
"train_l0": 3.731149435043335,
|
| 93 |
-
"val_l0": 3.5162997245788574
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.18094214797019958,
|
| 98 |
-
"val_loss": 0.14759597778320313,
|
| 99 |
-
"train_l0": 3.5462021827697754,
|
| 100 |
-
"val_l0": 3.5747838020324707
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.12367279827594757,
|
| 105 |
-
"val_loss": 0.19666061401367188,
|
| 106 |
-
"train_l0": 3.444838523864746,
|
| 107 |
-
"val_l0": 3.707578182220459
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.1919580101966858,
|
| 112 |
-
"val_loss": 0.167431640625,
|
| 113 |
-
"train_l0": 3.6078989505767822,
|
| 114 |
-
"val_l0": 3.445577621459961
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.08584243804216385,
|
| 119 |
-
"val_loss": 0.0935699462890625,
|
| 120 |
-
"train_l0": 3.5036087036132812,
|
| 121 |
-
"val_l0": 3.372669219970703
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.004086595959961414,
|
| 126 |
-
"val_loss": 0.1779205322265625,
|
| 127 |
-
"train_l0": 2.940523624420166,
|
| 128 |
-
"val_l0": 3.386509418487549
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.07857616245746613,
|
| 133 |
-
"val_loss": 0.1642467498779297,
|
| 134 |
-
"train_l0": 3.4715771675109863,
|
| 135 |
-
"val_l0": 4.133481979370117
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.18264088034629822,
|
| 140 |
-
"val_loss": 0.15473709106445313,
|
| 141 |
-
"train_l0": 3.339332342147827,
|
| 142 |
-
"val_l0": 3.5804057121276855
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.07045550644397736,
|
| 147 |
-
"val_loss": 0.08549880981445312,
|
| 148 |
-
"train_l0": 3.107649087905884,
|
| 149 |
-
"val_l0": 3.382086753845215
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.09806112945079803,
|
| 154 |
-
"val_loss": 0.271417236328125,
|
| 155 |
-
"train_l0": 3.3613860607147217,
|
| 156 |
-
"val_l0": 4.061157703399658
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.2173023223876953,
|
| 161 |
-
"val_loss": 0.1629016876220703,
|
| 162 |
-
"train_l0": 3.610020875930786,
|
| 163 |
-
"val_l0": 3.5916566848754883
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.059717610478401184,
|
| 168 |
-
"val_loss": 0.0760498046875,
|
| 169 |
-
"train_l0": 3.1996190547943115,
|
| 170 |
-
"val_l0": 3.4174418449401855
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.06465455144643784,
|
| 175 |
-
"val_loss": 0.109918212890625,
|
| 176 |
-
"train_l0": 3.100919723510742,
|
| 177 |
-
"val_l0": 3.3136749267578125
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 3.100919723510742
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L13.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 13,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.22395287454128265,
|
| 7 |
-
"val_loss": 0.2162261962890625,
|
| 8 |
-
"train_l0": 6.002742052078247,
|
| 9 |
-
"val_l0": 6.3762688636779785
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.23106394708156586,
|
| 14 |
-
"val_loss": 0.17551498413085936,
|
| 15 |
-
"train_l0": 4.743325710296631,
|
| 16 |
-
"val_l0": 5.017955303192139
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.05049969255924225,
|
| 21 |
-
"val_loss": 0.34801483154296875,
|
| 22 |
-
"train_l0": 4.119223356246948,
|
| 23 |
-
"val_l0": 5.083448886871338
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.258017361164093,
|
| 28 |
-
"val_loss": 0.27061996459960935,
|
| 29 |
-
"train_l0": 4.910182952880859,
|
| 30 |
-
"val_l0": 4.004611968994141
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.007300457917153835,
|
| 35 |
-
"val_loss": 0.06269607543945313,
|
| 36 |
-
"train_l0": 4.506659507751465,
|
| 37 |
-
"val_l0": 4.057683944702148
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.0030020377598702908,
|
| 42 |
-
"val_loss": 0.15885467529296876,
|
| 43 |
-
"train_l0": 4.197508096694946,
|
| 44 |
-
"val_l0": 4.219772815704346
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.23780347406864166,
|
| 49 |
-
"val_loss": 0.14090652465820314,
|
| 50 |
-
"train_l0": 4.0885210037231445,
|
| 51 |
-
"val_l0": 4.079396724700928
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.30215391516685486,
|
| 56 |
-
"val_loss": 0.1140838623046875,
|
| 57 |
-
"train_l0": 4.01490330696106,
|
| 58 |
-
"val_l0": 4.2679572105407715
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.19365569949150085,
|
| 63 |
-
"val_loss": 0.21255874633789062,
|
| 64 |
-
"train_l0": 4.0745556354522705,
|
| 65 |
-
"val_l0": 4.438607692718506
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.13495495915412903,
|
| 70 |
-
"val_loss": 0.08930130004882812,
|
| 71 |
-
"train_l0": 4.057210683822632,
|
| 72 |
-
"val_l0": 4.2247819900512695
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.17033696174621582,
|
| 77 |
-
"val_loss": 0.229937744140625,
|
| 78 |
-
"train_l0": 4.224717617034912,
|
| 79 |
-
"val_l0": 4.258725643157959
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.21462956070899963,
|
| 84 |
-
"val_loss": 0.060570907592773435,
|
| 85 |
-
"train_l0": 4.026174545288086,
|
| 86 |
-
"val_l0": 4.24569845199585
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.19176000356674194,
|
| 91 |
-
"val_loss": 0.17860031127929688,
|
| 92 |
-
"train_l0": 4.244852066040039,
|
| 93 |
-
"val_l0": 4.014825820922852
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.0026799733750522137,
|
| 98 |
-
"val_loss": 0.051315689086914064,
|
| 99 |
-
"train_l0": 3.9683520793914795,
|
| 100 |
-
"val_l0": 4.107019901275635
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.08866433054208755,
|
| 105 |
-
"val_loss": 0.17926101684570311,
|
| 106 |
-
"train_l0": 3.4518003463745117,
|
| 107 |
-
"val_l0": 3.758378028869629
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.040845200419425964,
|
| 112 |
-
"val_loss": 0.04536628723144531,
|
| 113 |
-
"train_l0": 3.974735736846924,
|
| 114 |
-
"val_l0": 3.6425113677978516
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.16504868865013123,
|
| 119 |
-
"val_loss": 0.18426551818847656,
|
| 120 |
-
"train_l0": 3.9739787578582764,
|
| 121 |
-
"val_l0": 4.038228988647461
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.156017005443573,
|
| 126 |
-
"val_loss": 0.15966224670410156,
|
| 127 |
-
"train_l0": 3.950601816177368,
|
| 128 |
-
"val_l0": 4.000825881958008
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.08060595393180847,
|
| 133 |
-
"val_loss": 0.16868095397949218,
|
| 134 |
-
"train_l0": 4.025626182556152,
|
| 135 |
-
"val_l0": 3.8591480255126953
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.16479790210723877,
|
| 140 |
-
"val_loss": 0.15739707946777343,
|
| 141 |
-
"train_l0": 3.9172887802124023,
|
| 142 |
-
"val_l0": 4.119517803192139
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.1623796820640564,
|
| 147 |
-
"val_loss": 0.15610275268554688,
|
| 148 |
-
"train_l0": 4.180437326431274,
|
| 149 |
-
"val_l0": 4.0860772132873535
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.14870871603488922,
|
| 154 |
-
"val_loss": 0.11494178771972656,
|
| 155 |
-
"train_l0": 3.824734687805176,
|
| 156 |
-
"val_l0": 3.9145278930664062
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.1420278698205948,
|
| 161 |
-
"val_loss": 0.07527694702148438,
|
| 162 |
-
"train_l0": 3.898346424102783,
|
| 163 |
-
"val_l0": 3.617892265319824
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.15156535804271698,
|
| 168 |
-
"val_loss": 0.12783889770507811,
|
| 169 |
-
"train_l0": 3.684896230697632,
|
| 170 |
-
"val_l0": 3.678267002105713
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.1299048811197281,
|
| 175 |
-
"val_loss": 0.11329269409179688,
|
| 176 |
-
"train_l0": 4.597461223602295,
|
| 177 |
-
"val_l0": 3.92117977142334
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 4.597461223602295
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L14.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 14,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.2268056422472,
|
| 7 |
-
"val_loss": 0.2205352783203125,
|
| 8 |
-
"train_l0": 5.908143520355225,
|
| 9 |
-
"val_l0": 5.960733890533447
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.3876601755619049,
|
| 14 |
-
"val_loss": 0.136370849609375,
|
| 15 |
-
"train_l0": 5.088132619857788,
|
| 16 |
-
"val_l0": 5.3023481369018555
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.05355757474899292,
|
| 21 |
-
"val_loss": 0.24253158569335936,
|
| 22 |
-
"train_l0": 5.041038990020752,
|
| 23 |
-
"val_l0": 4.485607147216797
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.07663963735103607,
|
| 28 |
-
"val_loss": 0.17482070922851561,
|
| 29 |
-
"train_l0": 4.319339990615845,
|
| 30 |
-
"val_l0": 4.566833972930908
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.17825694382190704,
|
| 35 |
-
"val_loss": 0.21924819946289062,
|
| 36 |
-
"train_l0": 4.110926389694214,
|
| 37 |
-
"val_l0": 4.370577335357666
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.3518032431602478,
|
| 42 |
-
"val_loss": 0.22976531982421874,
|
| 43 |
-
"train_l0": 4.49526309967041,
|
| 44 |
-
"val_l0": 4.418649673461914
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.003465613117441535,
|
| 49 |
-
"val_loss": 0.06223297119140625,
|
| 50 |
-
"train_l0": 4.507356882095337,
|
| 51 |
-
"val_l0": 4.193181991577148
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.12616412341594696,
|
| 56 |
-
"val_loss": 0.039304351806640624,
|
| 57 |
-
"train_l0": 4.681652784347534,
|
| 58 |
-
"val_l0": 4.043710231781006
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.1261879801750183,
|
| 63 |
-
"val_loss": 0.23475570678710939,
|
| 64 |
-
"train_l0": 3.8393139839172363,
|
| 65 |
-
"val_l0": 4.266026020050049
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.2797168493270874,
|
| 70 |
-
"val_loss": 0.21379547119140624,
|
| 71 |
-
"train_l0": 4.066145420074463,
|
| 72 |
-
"val_l0": 4.256992340087891
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.04294784367084503,
|
| 77 |
-
"val_loss": 0.18767662048339845,
|
| 78 |
-
"train_l0": 3.8974225521087646,
|
| 79 |
-
"val_l0": 4.089410305023193
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.14952577650547028,
|
| 84 |
-
"val_loss": 0.25541229248046876,
|
| 85 |
-
"train_l0": 3.974062204360962,
|
| 86 |
-
"val_l0": 4.064152240753174
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.18908026814460754,
|
| 91 |
-
"val_loss": 0.09503936767578125,
|
| 92 |
-
"train_l0": 4.12212610244751,
|
| 93 |
-
"val_l0": 3.885359764099121
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.026669470593333244,
|
| 98 |
-
"val_loss": 0.23632049560546875,
|
| 99 |
-
"train_l0": 3.7041962146759033,
|
| 100 |
-
"val_l0": 4.14576530456543
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.10926380008459091,
|
| 105 |
-
"val_loss": 0.1588043212890625,
|
| 106 |
-
"train_l0": 4.155206680297852,
|
| 107 |
-
"val_l0": 4.273812770843506
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.07134613394737244,
|
| 112 |
-
"val_loss": 0.18201751708984376,
|
| 113 |
-
"train_l0": 3.8694679737091064,
|
| 114 |
-
"val_l0": 3.9543890953063965
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.1773558259010315,
|
| 119 |
-
"val_loss": 0.11385307312011719,
|
| 120 |
-
"train_l0": 4.206365346908569,
|
| 121 |
-
"val_l0": 3.995187282562256
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.10871031135320663,
|
| 126 |
-
"val_loss": 0.1764965057373047,
|
| 127 |
-
"train_l0": 3.9139270782470703,
|
| 128 |
-
"val_l0": 4.0827155113220215
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.14885877072811127,
|
| 133 |
-
"val_loss": 0.17030410766601561,
|
| 134 |
-
"train_l0": 4.062902927398682,
|
| 135 |
-
"val_l0": 4.158968925476074
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.11427878588438034,
|
| 140 |
-
"val_loss": 0.2960807800292969,
|
| 141 |
-
"train_l0": 4.0442705154418945,
|
| 142 |
-
"val_l0": 4.237833023071289
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.2316785454750061,
|
| 147 |
-
"val_loss": 0.13748703002929688,
|
| 148 |
-
"train_l0": 3.6500871181488037,
|
| 149 |
-
"val_l0": 3.6406779289245605
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.05487024039030075,
|
| 154 |
-
"val_loss": 0.30532073974609375,
|
| 155 |
-
"train_l0": 3.9902210235595703,
|
| 156 |
-
"val_l0": 4.303326606750488
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.07683736085891724,
|
| 161 |
-
"val_loss": 0.11914253234863281,
|
| 162 |
-
"train_l0": 3.806394338607788,
|
| 163 |
-
"val_l0": 3.947298526763916
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.028781507164239883,
|
| 168 |
-
"val_loss": 0.13880653381347657,
|
| 169 |
-
"train_l0": 3.7544071674346924,
|
| 170 |
-
"val_l0": 3.7887930870056152
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.1085590049624443,
|
| 175 |
-
"val_loss": 0.16082077026367186,
|
| 176 |
-
"train_l0": 3.9828717708587646,
|
| 177 |
-
"val_l0": 4.116208553314209
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 3.9828717708587646
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L15.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 15,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.18859589099884033,
|
| 7 |
-
"val_loss": 0.38094482421875,
|
| 8 |
-
"train_l0": 6.112903356552124,
|
| 9 |
-
"val_l0": 7.063748836517334
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.12662295997142792,
|
| 14 |
-
"val_loss": 0.24796295166015625,
|
| 15 |
-
"train_l0": 6.68032169342041,
|
| 16 |
-
"val_l0": 6.632809638977051
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.12376363575458527,
|
| 21 |
-
"val_loss": 0.12600250244140626,
|
| 22 |
-
"train_l0": 5.3765058517456055,
|
| 23 |
-
"val_l0": 5.7402729988098145
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.12376091629266739,
|
| 28 |
-
"val_loss": 0.11396255493164062,
|
| 29 |
-
"train_l0": 5.408501625061035,
|
| 30 |
-
"val_l0": 5.558779239654541
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.15942886471748352,
|
| 35 |
-
"val_loss": 0.20974884033203126,
|
| 36 |
-
"train_l0": 5.766826868057251,
|
| 37 |
-
"val_l0": 4.799845218658447
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.2195884734392166,
|
| 42 |
-
"val_loss": 0.2046783447265625,
|
| 43 |
-
"train_l0": 4.670381546020508,
|
| 44 |
-
"val_l0": 4.87600564956665
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.1488950401544571,
|
| 49 |
-
"val_loss": 0.19315032958984374,
|
| 50 |
-
"train_l0": 5.135905742645264,
|
| 51 |
-
"val_l0": 5.232398509979248
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.16407719254493713,
|
| 56 |
-
"val_loss": 0.2600120544433594,
|
| 57 |
-
"train_l0": 4.8685431480407715,
|
| 58 |
-
"val_l0": 4.9507832527160645
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.28617897629737854,
|
| 63 |
-
"val_loss": 0.117840576171875,
|
| 64 |
-
"train_l0": 4.916584491729736,
|
| 65 |
-
"val_l0": 4.805908203125
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.26199525594711304,
|
| 70 |
-
"val_loss": 0.23673477172851562,
|
| 71 |
-
"train_l0": 4.759585857391357,
|
| 72 |
-
"val_l0": 5.2588605880737305
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.2180166393518448,
|
| 77 |
-
"val_loss": 0.05423583984375,
|
| 78 |
-
"train_l0": 5.22952675819397,
|
| 79 |
-
"val_l0": 5.001626014709473
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.0030850819312036037,
|
| 84 |
-
"val_loss": 0.11976394653320313,
|
| 85 |
-
"train_l0": 4.7512829303741455,
|
| 86 |
-
"val_l0": 5.086400508880615
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.09626377373933792,
|
| 91 |
-
"val_loss": 0.20292587280273439,
|
| 92 |
-
"train_l0": 5.210590362548828,
|
| 93 |
-
"val_l0": 4.75818395614624
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.05881235748529434,
|
| 98 |
-
"val_loss": 0.14430694580078124,
|
| 99 |
-
"train_l0": 4.784178733825684,
|
| 100 |
-
"val_l0": 4.971439838409424
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.1566220223903656,
|
| 105 |
-
"val_loss": 0.1758941650390625,
|
| 106 |
-
"train_l0": 5.287277698516846,
|
| 107 |
-
"val_l0": 4.890756607055664
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.1553994119167328,
|
| 112 |
-
"val_loss": 0.1737762451171875,
|
| 113 |
-
"train_l0": 4.770141839981079,
|
| 114 |
-
"val_l0": 4.963803291320801
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.25802457332611084,
|
| 119 |
-
"val_loss": 0.15414581298828126,
|
| 120 |
-
"train_l0": 4.83241081237793,
|
| 121 |
-
"val_l0": 4.572384357452393
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.09250403195619583,
|
| 126 |
-
"val_loss": 0.17729110717773439,
|
| 127 |
-
"train_l0": 5.365818738937378,
|
| 128 |
-
"val_l0": 4.858424663543701
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.17442752420902252,
|
| 133 |
-
"val_loss": 0.212823486328125,
|
| 134 |
-
"train_l0": 5.365270376205444,
|
| 135 |
-
"val_l0": 5.093202590942383
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.3724280595779419,
|
| 140 |
-
"val_loss": 0.12553634643554687,
|
| 141 |
-
"train_l0": 5.029004812240601,
|
| 142 |
-
"val_l0": 4.84776496887207
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.10034391283988953,
|
| 147 |
-
"val_loss": 0.10605010986328126,
|
| 148 |
-
"train_l0": 4.71007227897644,
|
| 149 |
-
"val_l0": 5.033133029937744
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.26655513048171997,
|
| 154 |
-
"val_loss": 0.0655670166015625,
|
| 155 |
-
"train_l0": 4.809176921844482,
|
| 156 |
-
"val_l0": 4.904501438140869
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.31244611740112305,
|
| 161 |
-
"val_loss": 0.11882476806640625,
|
| 162 |
-
"train_l0": 5.029934644699097,
|
| 163 |
-
"val_l0": 4.842426776885986
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.14197847247123718,
|
| 168 |
-
"val_loss": 0.16082687377929689,
|
| 169 |
-
"train_l0": 4.580521583557129,
|
| 170 |
-
"val_l0": 4.991309642791748
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.18068140745162964,
|
| 175 |
-
"val_loss": 0.15989227294921876,
|
| 176 |
-
"train_l0": 4.755055904388428,
|
| 177 |
-
"val_l0": 4.833583831787109
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 4.755055904388428
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L16.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 16,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 2.0147385597229004,
|
| 7 |
-
"val_loss": 1.483013916015625,
|
| 8 |
-
"train_l0": 8.8844895362854,
|
| 9 |
-
"val_l0": 8.844211101531982
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.7190955281257629,
|
| 14 |
-
"val_loss": 0.6447174072265625,
|
| 15 |
-
"train_l0": 7.325559854507446,
|
| 16 |
-
"val_l0": 7.473795413970947
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.43909192085266113,
|
| 21 |
-
"val_loss": 0.7190582275390625,
|
| 22 |
-
"train_l0": 7.045167684555054,
|
| 23 |
-
"val_l0": 7.293331623077393
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.1277594417333603,
|
| 28 |
-
"val_loss": 1.17286376953125,
|
| 29 |
-
"train_l0": 6.979811191558838,
|
| 30 |
-
"val_l0": 7.326333522796631
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.536592423915863,
|
| 35 |
-
"val_loss": 0.3073944091796875,
|
| 36 |
-
"train_l0": 7.271707057952881,
|
| 37 |
-
"val_l0": 7.446353435516357
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.43519431352615356,
|
| 42 |
-
"val_loss": 0.53253173828125,
|
| 43 |
-
"train_l0": 7.412272691726685,
|
| 44 |
-
"val_l0": 7.189347743988037
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.9889852404594421,
|
| 49 |
-
"val_loss": 0.333416748046875,
|
| 50 |
-
"train_l0": 7.545489072799683,
|
| 51 |
-
"val_l0": 7.190375328063965
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.35724973678588867,
|
| 56 |
-
"val_loss": 0.5863739013671875,
|
| 57 |
-
"train_l0": 7.315361499786377,
|
| 58 |
-
"val_l0": 7.307224273681641
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.6714401245117188,
|
| 63 |
-
"val_loss": 0.09823760986328126,
|
| 64 |
-
"train_l0": 6.85620903968811,
|
| 65 |
-
"val_l0": 6.532306671142578
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.1753266155719757,
|
| 70 |
-
"val_loss": 0.565533447265625,
|
| 71 |
-
"train_l0": 6.568515300750732,
|
| 72 |
-
"val_l0": 7.1772027015686035
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.07969269901514053,
|
| 77 |
-
"val_loss": 0.5308624267578125,
|
| 78 |
-
"train_l0": 6.443041563034058,
|
| 79 |
-
"val_l0": 7.251532077789307
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.589357852935791,
|
| 84 |
-
"val_loss": 0.402227783203125,
|
| 85 |
-
"train_l0": 7.125425338745117,
|
| 86 |
-
"val_l0": 6.952426433563232
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.612260103225708,
|
| 91 |
-
"val_loss": 0.501318359375,
|
| 92 |
-
"train_l0": 7.279670238494873,
|
| 93 |
-
"val_l0": 7.028241157531738
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.03864092379808426,
|
| 98 |
-
"val_loss": 0.07253875732421874,
|
| 99 |
-
"train_l0": 7.043129205703735,
|
| 100 |
-
"val_l0": 7.216846942901611
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.04911990463733673,
|
| 105 |
-
"val_loss": 0.29129486083984374,
|
| 106 |
-
"train_l0": 6.400692462921143,
|
| 107 |
-
"val_l0": 7.224123477935791
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.4463154673576355,
|
| 112 |
-
"val_loss": 0.453875732421875,
|
| 113 |
-
"train_l0": 7.75114893913269,
|
| 114 |
-
"val_l0": 7.297854423522949
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.29518067836761475,
|
| 119 |
-
"val_loss": 0.43136444091796877,
|
| 120 |
-
"train_l0": 7.203382253646851,
|
| 121 |
-
"val_l0": 7.667086124420166
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.3541266918182373,
|
| 126 |
-
"val_loss": 0.304974365234375,
|
| 127 |
-
"train_l0": 7.325237989425659,
|
| 128 |
-
"val_l0": 7.291569709777832
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.6822983026504517,
|
| 133 |
-
"val_loss": 0.177850341796875,
|
| 134 |
-
"train_l0": 7.492810487747192,
|
| 135 |
-
"val_l0": 7.257242202758789
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.8102273941040039,
|
| 140 |
-
"val_loss": 0.4614349365234375,
|
| 141 |
-
"train_l0": 7.881307601928711,
|
| 142 |
-
"val_l0": 7.399499416351318
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.0868353396654129,
|
| 147 |
-
"val_loss": 0.31473388671875,
|
| 148 |
-
"train_l0": 7.5837016105651855,
|
| 149 |
-
"val_l0": 7.386074066162109
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 1.0328900814056396,
|
| 154 |
-
"val_loss": 0.6586456298828125,
|
| 155 |
-
"train_l0": 7.588446140289307,
|
| 156 |
-
"val_l0": 8.125596046447754
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.5718325972557068,
|
| 161 |
-
"val_loss": 0.5662017822265625,
|
| 162 |
-
"train_l0": 7.29985237121582,
|
| 163 |
-
"val_l0": 7.754888534545898
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.3163934350013733,
|
| 168 |
-
"val_loss": 0.27583770751953124,
|
| 169 |
-
"train_l0": 7.40966796875,
|
| 170 |
-
"val_l0": 7.692563533782959
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.03988220542669296,
|
| 175 |
-
"val_loss": 0.34824371337890625,
|
| 176 |
-
"train_l0": 7.601940631866455,
|
| 177 |
-
"val_l0": 7.565097808837891
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 7.601940631866455
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L17.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 17,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.29547742009162903,
|
| 7 |
-
"val_loss": 0.25211181640625,
|
| 8 |
-
"train_l0": 7.947826385498047,
|
| 9 |
-
"val_l0": 8.013341426849365
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.18066932260990143,
|
| 14 |
-
"val_loss": 0.2115203857421875,
|
| 15 |
-
"train_l0": 7.252681255340576,
|
| 16 |
-
"val_l0": 7.373335361480713
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.1320885419845581,
|
| 21 |
-
"val_loss": 0.260302734375,
|
| 22 |
-
"train_l0": 7.383096218109131,
|
| 23 |
-
"val_l0": 7.235217094421387
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.14655840396881104,
|
| 28 |
-
"val_loss": 0.2014129638671875,
|
| 29 |
-
"train_l0": 6.49915337562561,
|
| 30 |
-
"val_l0": 6.597681045532227
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.08175850659608841,
|
| 35 |
-
"val_loss": 0.25476531982421874,
|
| 36 |
-
"train_l0": 6.446212530136108,
|
| 37 |
-
"val_l0": 6.738440990447998
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.27209439873695374,
|
| 42 |
-
"val_loss": 0.209674072265625,
|
| 43 |
-
"train_l0": 6.320023536682129,
|
| 44 |
-
"val_l0": 6.909973621368408
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.6252310276031494,
|
| 49 |
-
"val_loss": 0.1320404052734375,
|
| 50 |
-
"train_l0": 7.282721996307373,
|
| 51 |
-
"val_l0": 6.144618988037109
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.004336773417890072,
|
| 56 |
-
"val_loss": 0.17270050048828126,
|
| 57 |
-
"train_l0": 5.782961845397949,
|
| 58 |
-
"val_l0": 6.5926384925842285
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.15362975001335144,
|
| 63 |
-
"val_loss": 0.1113372802734375,
|
| 64 |
-
"train_l0": 6.0371458530426025,
|
| 65 |
-
"val_l0": 5.952432155609131
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.10350187122821808,
|
| 70 |
-
"val_loss": 0.13460273742675782,
|
| 71 |
-
"train_l0": 5.82546591758728,
|
| 72 |
-
"val_l0": 6.116945743560791
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.0679798498749733,
|
| 77 |
-
"val_loss": 0.22560882568359375,
|
| 78 |
-
"train_l0": 5.661338567733765,
|
| 79 |
-
"val_l0": 6.385469436645508
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.3262917101383209,
|
| 84 |
-
"val_loss": 0.150030517578125,
|
| 85 |
-
"train_l0": 6.516504287719727,
|
| 86 |
-
"val_l0": 6.351232528686523
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.0715227946639061,
|
| 91 |
-
"val_loss": 0.28294525146484373,
|
| 92 |
-
"train_l0": 5.712169408798218,
|
| 93 |
-
"val_l0": 6.372239589691162
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.0030827573500573635,
|
| 98 |
-
"val_loss": 0.10286483764648438,
|
| 99 |
-
"train_l0": 5.119132995605469,
|
| 100 |
-
"val_l0": 5.68516731262207
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.05433140695095062,
|
| 105 |
-
"val_loss": 0.2620574951171875,
|
| 106 |
-
"train_l0": 5.781108140945435,
|
| 107 |
-
"val_l0": 6.322429180145264
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.18326455354690552,
|
| 112 |
-
"val_loss": 0.1171630859375,
|
| 113 |
-
"train_l0": 5.858641862869263,
|
| 114 |
-
"val_l0": 5.607466697692871
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.1755848228931427,
|
| 119 |
-
"val_loss": 0.11911773681640625,
|
| 120 |
-
"train_l0": 6.564021110534668,
|
| 121 |
-
"val_l0": 5.932223796844482
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.0037277420051395893,
|
| 126 |
-
"val_loss": 0.21335678100585936,
|
| 127 |
-
"train_l0": 5.113232135772705,
|
| 128 |
-
"val_l0": 6.149611473083496
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.10639315843582153,
|
| 133 |
-
"val_loss": 0.23836593627929686,
|
| 134 |
-
"train_l0": 6.345129013061523,
|
| 135 |
-
"val_l0": 5.920286178588867
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.39258724451065063,
|
| 140 |
-
"val_loss": 0.1180694580078125,
|
| 141 |
-
"train_l0": 6.1448752880096436,
|
| 142 |
-
"val_l0": 5.517294406890869
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.419334352016449,
|
| 147 |
-
"val_loss": 0.126666259765625,
|
| 148 |
-
"train_l0": 6.489890813827515,
|
| 149 |
-
"val_l0": 5.608823299407959
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.2981112599372864,
|
| 154 |
-
"val_loss": 0.06766433715820312,
|
| 155 |
-
"train_l0": 6.16682767868042,
|
| 156 |
-
"val_l0": 5.496656894683838
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.10180480033159256,
|
| 161 |
-
"val_loss": 0.11992645263671875,
|
| 162 |
-
"train_l0": 5.702310800552368,
|
| 163 |
-
"val_l0": 5.680327415466309
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.003014140296727419,
|
| 168 |
-
"val_loss": 0.12350234985351563,
|
| 169 |
-
"train_l0": 5.111187696456909,
|
| 170 |
-
"val_l0": 5.841434001922607
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.15246543288230896,
|
| 175 |
-
"val_loss": 0.09812698364257813,
|
| 176 |
-
"train_l0": 5.976772308349609,
|
| 177 |
-
"val_l0": 5.278291702270508
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 5.976772308349609
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L18.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 18,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.18153195083141327,
|
| 7 |
-
"val_loss": 0.1036529541015625,
|
| 8 |
-
"train_l0": 9.34842824935913,
|
| 9 |
-
"val_l0": 9.272804260253906
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.06843207031488419,
|
| 14 |
-
"val_loss": 0.16837005615234374,
|
| 15 |
-
"train_l0": 8.522951602935791,
|
| 16 |
-
"val_l0": 8.68922472000122
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.12185792624950409,
|
| 21 |
-
"val_loss": 0.084832763671875,
|
| 22 |
-
"train_l0": 8.400404453277588,
|
| 23 |
-
"val_l0": 8.539772033691406
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.08921658992767334,
|
| 28 |
-
"val_loss": 0.21422805786132812,
|
| 29 |
-
"train_l0": 8.020985126495361,
|
| 30 |
-
"val_l0": 8.099546432495117
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.13305523991584778,
|
| 35 |
-
"val_loss": 0.15858001708984376,
|
| 36 |
-
"train_l0": 7.833904027938843,
|
| 37 |
-
"val_l0": 7.949264049530029
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.05351705476641655,
|
| 42 |
-
"val_loss": 0.1508392333984375,
|
| 43 |
-
"train_l0": 7.462650537490845,
|
| 44 |
-
"val_l0": 7.575962543487549
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.35643118619918823,
|
| 49 |
-
"val_loss": 0.11534194946289063,
|
| 50 |
-
"train_l0": 7.494688034057617,
|
| 51 |
-
"val_l0": 7.154510021209717
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.003675672458484769,
|
| 56 |
-
"val_loss": 0.169952392578125,
|
| 57 |
-
"train_l0": 6.1886608600616455,
|
| 58 |
-
"val_l0": 7.263972759246826
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.13229785859584808,
|
| 63 |
-
"val_loss": 0.08189620971679687,
|
| 64 |
-
"train_l0": 6.978583335876465,
|
| 65 |
-
"val_l0": 6.645839214324951
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.14124521613121033,
|
| 70 |
-
"val_loss": 0.1912933349609375,
|
| 71 |
-
"train_l0": 5.8920323848724365,
|
| 72 |
-
"val_l0": 5.938117504119873
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.08672825247049332,
|
| 77 |
-
"val_loss": 0.1457122802734375,
|
| 78 |
-
"train_l0": 6.410008668899536,
|
| 79 |
-
"val_l0": 6.610515117645264
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.1326695680618286,
|
| 84 |
-
"val_loss": 0.120147705078125,
|
| 85 |
-
"train_l0": 7.090938091278076,
|
| 86 |
-
"val_l0": 6.916561126708984
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.003633265383541584,
|
| 91 |
-
"val_loss": 0.16397628784179688,
|
| 92 |
-
"train_l0": 6.988579034805298,
|
| 93 |
-
"val_l0": 7.21776008605957
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.11385974287986755,
|
| 98 |
-
"val_loss": 0.07097244262695312,
|
| 99 |
-
"train_l0": 6.327402591705322,
|
| 100 |
-
"val_l0": 6.191682815551758
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.03426986560225487,
|
| 105 |
-
"val_loss": 0.0904296875,
|
| 106 |
-
"train_l0": 6.470334529876709,
|
| 107 |
-
"val_l0": 6.5923333168029785
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.05054966360330582,
|
| 112 |
-
"val_loss": 0.17446975708007811,
|
| 113 |
-
"train_l0": 5.808448791503906,
|
| 114 |
-
"val_l0": 6.333284378051758
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.07079701125621796,
|
| 119 |
-
"val_loss": 0.12457427978515626,
|
| 120 |
-
"train_l0": 5.410349369049072,
|
| 121 |
-
"val_l0": 6.60409688949585
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.002599747385829687,
|
| 126 |
-
"val_loss": 0.047154998779296874,
|
| 127 |
-
"train_l0": 5.90897798538208,
|
| 128 |
-
"val_l0": 6.068098545074463
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.07271479815244675,
|
| 133 |
-
"val_loss": 0.13225936889648438,
|
| 134 |
-
"train_l0": 5.689173936843872,
|
| 135 |
-
"val_l0": 6.391794681549072
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.05849483609199524,
|
| 140 |
-
"val_loss": 0.14538917541503907,
|
| 141 |
-
"train_l0": 6.572252511978149,
|
| 142 |
-
"val_l0": 6.945149898529053
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.0371062308549881,
|
| 147 |
-
"val_loss": 0.11444931030273438,
|
| 148 |
-
"train_l0": 5.965441465377808,
|
| 149 |
-
"val_l0": 7.010195255279541
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.043126996606588364,
|
| 154 |
-
"val_loss": 0.0837158203125,
|
| 155 |
-
"train_l0": 6.728255748748779,
|
| 156 |
-
"val_l0": 6.374139785766602
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.06577989459037781,
|
| 161 |
-
"val_loss": 0.07768783569335938,
|
| 162 |
-
"train_l0": 6.2803566455841064,
|
| 163 |
-
"val_l0": 6.004130840301514
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.13661718368530273,
|
| 168 |
-
"val_loss": 0.08974533081054688,
|
| 169 |
-
"train_l0": 6.767857074737549,
|
| 170 |
-
"val_l0": 6.150918006896973
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.02224155329167843,
|
| 175 |
-
"val_loss": 0.08356552124023438,
|
| 176 |
-
"train_l0": 6.570655107498169,
|
| 177 |
-
"val_l0": 6.584842205047607
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 6.570655107498169
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L19.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 19,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.16328153014183044,
|
| 7 |
-
"val_loss": 0.7614501953125,
|
| 8 |
-
"train_l0": 9.410011768341064,
|
| 9 |
-
"val_l0": 9.672513008117676
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 1.106956958770752,
|
| 14 |
-
"val_loss": 0.285394287109375,
|
| 15 |
-
"train_l0": 9.058338403701782,
|
| 16 |
-
"val_l0": 8.911561965942383
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.3582995533943176,
|
| 21 |
-
"val_loss": 0.152655029296875,
|
| 22 |
-
"train_l0": 8.900266885757446,
|
| 23 |
-
"val_l0": 8.851428031921387
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.3630158007144928,
|
| 28 |
-
"val_loss": 0.3028289794921875,
|
| 29 |
-
"train_l0": 8.60481858253479,
|
| 30 |
-
"val_l0": 8.673374652862549
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.24467700719833374,
|
| 35 |
-
"val_loss": 0.3413848876953125,
|
| 36 |
-
"train_l0": 8.545207977294922,
|
| 37 |
-
"val_l0": 8.71147632598877
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.08691535890102386,
|
| 42 |
-
"val_loss": 0.1753326416015625,
|
| 43 |
-
"train_l0": 8.676499128341675,
|
| 44 |
-
"val_l0": 8.312342166900635
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.3015482425689697,
|
| 49 |
-
"val_loss": 0.229278564453125,
|
| 50 |
-
"train_l0": 8.302879333496094,
|
| 51 |
-
"val_l0": 8.557393550872803
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.20741379261016846,
|
| 56 |
-
"val_loss": 0.165313720703125,
|
| 57 |
-
"train_l0": 8.581340312957764,
|
| 58 |
-
"val_l0": 7.655327320098877
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.3870965242385864,
|
| 63 |
-
"val_loss": 0.23432464599609376,
|
| 64 |
-
"train_l0": 8.170473575592041,
|
| 65 |
-
"val_l0": 7.911219596862793
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.19583956897258759,
|
| 70 |
-
"val_loss": 0.3945159912109375,
|
| 71 |
-
"train_l0": 7.29408860206604,
|
| 72 |
-
"val_l0": 8.134562969207764
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.2413194626569748,
|
| 77 |
-
"val_loss": 0.175360107421875,
|
| 78 |
-
"train_l0": 8.293479681015015,
|
| 79 |
-
"val_l0": 7.956058979034424
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.235012024641037,
|
| 84 |
-
"val_loss": 0.23008270263671876,
|
| 85 |
-
"train_l0": 7.574540376663208,
|
| 86 |
-
"val_l0": 7.818315029144287
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.32752957940101624,
|
| 91 |
-
"val_loss": 0.213995361328125,
|
| 92 |
-
"train_l0": 7.984650135040283,
|
| 93 |
-
"val_l0": 8.027493953704834
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.33268076181411743,
|
| 98 |
-
"val_loss": 0.2492584228515625,
|
| 99 |
-
"train_l0": 7.781195640563965,
|
| 100 |
-
"val_l0": 7.636430263519287
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.10630679130554199,
|
| 105 |
-
"val_loss": 0.294158935546875,
|
| 106 |
-
"train_l0": 7.874828577041626,
|
| 107 |
-
"val_l0": 7.7915120124816895
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.3239853084087372,
|
| 112 |
-
"val_loss": 0.3273040771484375,
|
| 113 |
-
"train_l0": 8.166217803955078,
|
| 114 |
-
"val_l0": 7.872045040130615
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.10050178319215775,
|
| 119 |
-
"val_loss": 0.2894775390625,
|
| 120 |
-
"train_l0": 7.126861810684204,
|
| 121 |
-
"val_l0": 8.002984523773193
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.2620861530303955,
|
| 126 |
-
"val_loss": 0.2463470458984375,
|
| 127 |
-
"train_l0": 7.320934534072876,
|
| 128 |
-
"val_l0": 7.738144397735596
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.1980583518743515,
|
| 133 |
-
"val_loss": 0.14094696044921876,
|
| 134 |
-
"train_l0": 7.894331216812134,
|
| 135 |
-
"val_l0": 7.874355316162109
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.3361251950263977,
|
| 140 |
-
"val_loss": 0.10409698486328126,
|
| 141 |
-
"train_l0": 7.724350690841675,
|
| 142 |
-
"val_l0": 7.3462605476379395
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.18273675441741943,
|
| 147 |
-
"val_loss": 0.2894775390625,
|
| 148 |
-
"train_l0": 7.522475719451904,
|
| 149 |
-
"val_l0": 7.949066162109375
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.055990416556596756,
|
| 154 |
-
"val_loss": 0.2091644287109375,
|
| 155 |
-
"train_l0": 7.712060213088989,
|
| 156 |
-
"val_l0": 8.096511363983154
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.2383211851119995,
|
| 161 |
-
"val_loss": 0.24113311767578124,
|
| 162 |
-
"train_l0": 8.246296644210815,
|
| 163 |
-
"val_l0": 8.061425685882568
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.12159339338541031,
|
| 168 |
-
"val_loss": 0.23677520751953124,
|
| 169 |
-
"train_l0": 7.3075175285339355,
|
| 170 |
-
"val_l0": 7.9546308517456055
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.13588812947273254,
|
| 175 |
-
"val_loss": 0.244696044921875,
|
| 176 |
-
"train_l0": 7.477247714996338,
|
| 177 |
-
"val_l0": 8.267624378204346
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 7.477247714996338
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L2.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 2,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 1.9761638641357422,
|
| 7 |
-
"val_loss": 3.1903717041015627,
|
| 8 |
-
"train_l0": 6.391435861587524,
|
| 9 |
-
"val_l0": 6.662788391113281
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 1.557054042816162,
|
| 14 |
-
"val_loss": 1.2012100219726562,
|
| 15 |
-
"train_l0": 7.713514566421509,
|
| 16 |
-
"val_l0": 6.247289180755615
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.033284783363342285,
|
| 21 |
-
"val_loss": 1.2727691650390625,
|
| 22 |
-
"train_l0": 6.77381157875061,
|
| 23 |
-
"val_l0": 6.737117767333984
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 2.1294851303100586,
|
| 28 |
-
"val_loss": 2.918695068359375,
|
| 29 |
-
"train_l0": 3.5417914390563965,
|
| 30 |
-
"val_l0": 5.333240032196045
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 1.4643634557724,
|
| 35 |
-
"val_loss": 1.5200180053710937,
|
| 36 |
-
"train_l0": 5.513817071914673,
|
| 37 |
-
"val_l0": 5.526895523071289
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 1.1451902389526367,
|
| 42 |
-
"val_loss": 2.2034011840820313,
|
| 43 |
-
"train_l0": 7.441985607147217,
|
| 44 |
-
"val_l0": 6.193974018096924
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.3887673616409302,
|
| 49 |
-
"val_loss": 1.6567428588867188,
|
| 50 |
-
"train_l0": 5.183881521224976,
|
| 51 |
-
"val_l0": 5.194427967071533
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 1.1516119241714478,
|
| 56 |
-
"val_loss": 2.549102783203125,
|
| 57 |
-
"train_l0": 6.440049409866333,
|
| 58 |
-
"val_l0": 7.178695201873779
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 1.101319670677185,
|
| 63 |
-
"val_loss": 3.256329345703125,
|
| 64 |
-
"train_l0": 6.867629289627075,
|
| 65 |
-
"val_l0": 6.555483341217041
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 1.2290267944335938,
|
| 70 |
-
"val_loss": 2.0247894287109376,
|
| 71 |
-
"train_l0": 5.375576019287109,
|
| 72 |
-
"val_l0": 6.489918231964111
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.4021357297897339,
|
| 77 |
-
"val_loss": 0.9072250366210938,
|
| 78 |
-
"train_l0": 5.079847574234009,
|
| 79 |
-
"val_l0": 6.635992527008057
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 1.7660073041915894,
|
| 84 |
-
"val_loss": 2.47327880859375,
|
| 85 |
-
"train_l0": 4.132765531539917,
|
| 86 |
-
"val_l0": 5.481312274932861
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 2.505723476409912,
|
| 91 |
-
"val_loss": 1.3227386474609375,
|
| 92 |
-
"train_l0": 7.045191526412964,
|
| 93 |
-
"val_l0": 5.382688045501709
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 2.1010913848876953,
|
| 98 |
-
"val_loss": 1.9941879272460938,
|
| 99 |
-
"train_l0": 4.841649532318115,
|
| 100 |
-
"val_l0": 4.034926891326904
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.8153737783432007,
|
| 105 |
-
"val_loss": 2.0456832885742187,
|
| 106 |
-
"train_l0": 4.412078857421875,
|
| 107 |
-
"val_l0": 4.26976203918457
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 1.7816202640533447,
|
| 112 |
-
"val_loss": 1.3784133911132812,
|
| 113 |
-
"train_l0": 3.1894147396087646,
|
| 114 |
-
"val_l0": 6.235237121582031
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.07173456251621246,
|
| 119 |
-
"val_loss": 2.302900695800781,
|
| 120 |
-
"train_l0": 8.204871416091919,
|
| 121 |
-
"val_l0": 5.319571495056152
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.8552208542823792,
|
| 126 |
-
"val_loss": 1.7994430541992188,
|
| 127 |
-
"train_l0": 4.5390307903289795,
|
| 128 |
-
"val_l0": 5.086700916290283
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.381530225276947,
|
| 133 |
-
"val_loss": 1.0194320678710938,
|
| 134 |
-
"train_l0": 4.763072729110718,
|
| 135 |
-
"val_l0": 5.433690547943115
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 1.466295599937439,
|
| 140 |
-
"val_loss": 1.1989639282226563,
|
| 141 |
-
"train_l0": 5.863094329833984,
|
| 142 |
-
"val_l0": 6.461508274078369
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 2.5544533729553223,
|
| 147 |
-
"val_loss": 1.8347068786621095,
|
| 148 |
-
"train_l0": 6.980335712432861,
|
| 149 |
-
"val_l0": 5.383918285369873
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.7380867004394531,
|
| 154 |
-
"val_loss": 0.8785980224609375,
|
| 155 |
-
"train_l0": 5.960804224014282,
|
| 156 |
-
"val_l0": 3.778097629547119
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 1.5479521751403809,
|
| 161 |
-
"val_loss": 1.2941329956054688,
|
| 162 |
-
"train_l0": 4.40671443939209,
|
| 163 |
-
"val_l0": 4.870655536651611
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.5090218782424927,
|
| 168 |
-
"val_loss": 2.5589385986328126,
|
| 169 |
-
"train_l0": 5.473923683166504,
|
| 170 |
-
"val_l0": 5.626881122589111
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 1.7641514539718628,
|
| 175 |
-
"val_loss": 1.7287353515625,
|
| 176 |
-
"train_l0": 4.126083850860596,
|
| 177 |
-
"val_l0": 5.8902812004089355
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 4.126083850860596
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L20.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 20,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.14699426293373108,
|
| 7 |
-
"val_loss": 0.13165283203125,
|
| 8 |
-
"train_l0": 10.582691431045532,
|
| 9 |
-
"val_l0": 10.451538562774658
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.23101815581321716,
|
| 14 |
-
"val_loss": 0.28314056396484377,
|
| 15 |
-
"train_l0": 9.41057801246643,
|
| 16 |
-
"val_l0": 9.788892269134521
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.08707526326179504,
|
| 21 |
-
"val_loss": 0.27347412109375,
|
| 22 |
-
"train_l0": 9.794217348098755,
|
| 23 |
-
"val_l0": 10.063333511352539
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.3588435649871826,
|
| 28 |
-
"val_loss": 0.2654388427734375,
|
| 29 |
-
"train_l0": 9.649884700775146,
|
| 30 |
-
"val_l0": 9.44399356842041
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.05659712851047516,
|
| 35 |
-
"val_loss": 0.28250961303710936,
|
| 36 |
-
"train_l0": 9.239715337753296,
|
| 37 |
-
"val_l0": 9.546241760253906
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.06258459389209747,
|
| 42 |
-
"val_loss": 0.28629226684570314,
|
| 43 |
-
"train_l0": 9.168636798858643,
|
| 44 |
-
"val_l0": 9.423484802246094
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.19844704866409302,
|
| 49 |
-
"val_loss": 0.0340789794921875,
|
| 50 |
-
"train_l0": 9.07091498374939,
|
| 51 |
-
"val_l0": 8.48935604095459
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.08194287866353989,
|
| 56 |
-
"val_loss": 0.312615966796875,
|
| 57 |
-
"train_l0": 8.271408081054688,
|
| 58 |
-
"val_l0": 8.831908702850342
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.10043966770172119,
|
| 63 |
-
"val_loss": 0.2164337158203125,
|
| 64 |
-
"train_l0": 7.838207483291626,
|
| 65 |
-
"val_l0": 8.3097243309021
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.09914621710777283,
|
| 70 |
-
"val_loss": 0.12376022338867188,
|
| 71 |
-
"train_l0": 7.564306259155273,
|
| 72 |
-
"val_l0": 8.117237091064453
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.0815201923251152,
|
| 77 |
-
"val_loss": 0.10303955078125,
|
| 78 |
-
"train_l0": 7.491922378540039,
|
| 79 |
-
"val_l0": 7.953670024871826
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.1623128354549408,
|
| 84 |
-
"val_loss": 0.08680648803710937,
|
| 85 |
-
"train_l0": 9.06481146812439,
|
| 86 |
-
"val_l0": 7.502641677856445
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.05493927001953125,
|
| 91 |
-
"val_loss": 0.116326904296875,
|
| 92 |
-
"train_l0": 7.673841714859009,
|
| 93 |
-
"val_l0": 7.588622570037842
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.18300625681877136,
|
| 98 |
-
"val_loss": 0.20911483764648436,
|
| 99 |
-
"train_l0": 8.141428232192993,
|
| 100 |
-
"val_l0": 8.038487434387207
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.33356937766075134,
|
| 105 |
-
"val_loss": 0.13643341064453124,
|
| 106 |
-
"train_l0": 8.726048469543457,
|
| 107 |
-
"val_l0": 8.04246187210083
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.1465834528207779,
|
| 112 |
-
"val_loss": 0.08868255615234374,
|
| 113 |
-
"train_l0": 7.541239261627197,
|
| 114 |
-
"val_l0": 7.17219352722168
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.1949060708284378,
|
| 119 |
-
"val_loss": 0.06849746704101563,
|
| 120 |
-
"train_l0": 8.290153741836548,
|
| 121 |
-
"val_l0": 7.0745158195495605
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.05165141075849533,
|
| 126 |
-
"val_loss": 0.07282562255859375,
|
| 127 |
-
"train_l0": 7.266104221343994,
|
| 128 |
-
"val_l0": 7.104558944702148
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.15942546725273132,
|
| 133 |
-
"val_loss": 0.15430068969726562,
|
| 134 |
-
"train_l0": 8.259528875350952,
|
| 135 |
-
"val_l0": 7.801253795623779
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.13976645469665527,
|
| 140 |
-
"val_loss": 0.15994873046875,
|
| 141 |
-
"train_l0": 7.7893078327178955,
|
| 142 |
-
"val_l0": 7.584846019744873
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.15116536617279053,
|
| 147 |
-
"val_loss": 0.08576812744140624,
|
| 148 |
-
"train_l0": 8.25345516204834,
|
| 149 |
-
"val_l0": 6.804840564727783
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.11699505150318146,
|
| 154 |
-
"val_loss": 0.087786865234375,
|
| 155 |
-
"train_l0": 7.164949178695679,
|
| 156 |
-
"val_l0": 7.242691516876221
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.10113253444433212,
|
| 161 |
-
"val_loss": 0.14269332885742186,
|
| 162 |
-
"train_l0": 6.981378793716431,
|
| 163 |
-
"val_l0": 7.064957618713379
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.05499845743179321,
|
| 168 |
-
"val_loss": 0.11155853271484376,
|
| 169 |
-
"train_l0": 6.736016273498535,
|
| 170 |
-
"val_l0": 7.270493507385254
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.11084996908903122,
|
| 175 |
-
"val_loss": 0.05323944091796875,
|
| 176 |
-
"train_l0": 7.761210203170776,
|
| 177 |
-
"val_l0": 6.732525825500488
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 7.761210203170776
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L21.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 21,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.01332792267203331,
|
| 7 |
-
"val_loss": 0.277655029296875,
|
| 8 |
-
"train_l0": 10.608041286468506,
|
| 9 |
-
"val_l0": 10.762674808502197
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.2547125518321991,
|
| 14 |
-
"val_loss": 0.3917236328125,
|
| 15 |
-
"train_l0": 10.508888959884644,
|
| 16 |
-
"val_l0": 10.694403648376465
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.1171446219086647,
|
| 21 |
-
"val_loss": 0.12552947998046876,
|
| 22 |
-
"train_l0": 9.497666358947754,
|
| 23 |
-
"val_l0": 9.637413024902344
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.09298652410507202,
|
| 28 |
-
"val_loss": 0.087249755859375,
|
| 29 |
-
"train_l0": 8.23696255683899,
|
| 30 |
-
"val_l0": 8.367600440979004
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.20712248980998993,
|
| 35 |
-
"val_loss": 0.20865402221679688,
|
| 36 |
-
"train_l0": 8.668595552444458,
|
| 37 |
-
"val_l0": 8.566527366638184
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.20033439993858337,
|
| 42 |
-
"val_loss": 0.1664215087890625,
|
| 43 |
-
"train_l0": 7.781904935836792,
|
| 44 |
-
"val_l0": 7.83083438873291
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.25378119945526123,
|
| 49 |
-
"val_loss": 0.13873748779296874,
|
| 50 |
-
"train_l0": 7.891845703125,
|
| 51 |
-
"val_l0": 7.767608165740967
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.2043895572423935,
|
| 56 |
-
"val_loss": 0.09123306274414063,
|
| 57 |
-
"train_l0": 8.173108100891113,
|
| 58 |
-
"val_l0": 7.316091060638428
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.2300933599472046,
|
| 63 |
-
"val_loss": 0.074798583984375,
|
| 64 |
-
"train_l0": 7.918494939804077,
|
| 65 |
-
"val_l0": 7.5612711906433105
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.16990616917610168,
|
| 70 |
-
"val_loss": 0.17627334594726562,
|
| 71 |
-
"train_l0": 7.038271427154541,
|
| 72 |
-
"val_l0": 6.913094520568848
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.3137889802455902,
|
| 77 |
-
"val_loss": 0.14666900634765626,
|
| 78 |
-
"train_l0": 7.959175109863281,
|
| 79 |
-
"val_l0": 7.098090648651123
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.08380964398384094,
|
| 84 |
-
"val_loss": 0.18900527954101562,
|
| 85 |
-
"train_l0": 6.956285238265991,
|
| 86 |
-
"val_l0": 7.64517068862915
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.06633605808019638,
|
| 91 |
-
"val_loss": 0.14645919799804688,
|
| 92 |
-
"train_l0": 5.499094724655151,
|
| 93 |
-
"val_l0": 6.773960590362549
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.13743820786476135,
|
| 98 |
-
"val_loss": 0.17334442138671874,
|
| 99 |
-
"train_l0": 7.4706971645355225,
|
| 100 |
-
"val_l0": 7.685177326202393
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.13275736570358276,
|
| 105 |
-
"val_loss": 0.09549102783203126,
|
| 106 |
-
"train_l0": 8.150416612625122,
|
| 107 |
-
"val_l0": 6.291041374206543
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.07135528326034546,
|
| 112 |
-
"val_loss": 0.12577743530273439,
|
| 113 |
-
"train_l0": 7.34713077545166,
|
| 114 |
-
"val_l0": 7.054803371429443
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.11701130121946335,
|
| 119 |
-
"val_loss": 0.051152801513671874,
|
| 120 |
-
"train_l0": 7.402437925338745,
|
| 121 |
-
"val_l0": 6.671254634857178
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.15087389945983887,
|
| 126 |
-
"val_loss": 0.1098663330078125,
|
| 127 |
-
"train_l0": 6.02908730506897,
|
| 128 |
-
"val_l0": 5.872364044189453
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.1514844447374344,
|
| 133 |
-
"val_loss": 0.0766357421875,
|
| 134 |
-
"train_l0": 6.948155164718628,
|
| 135 |
-
"val_l0": 6.3866496086120605
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.09939510375261307,
|
| 140 |
-
"val_loss": 0.07453231811523438,
|
| 141 |
-
"train_l0": 6.65438175201416,
|
| 142 |
-
"val_l0": 6.505246162414551
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.23903252184391022,
|
| 147 |
-
"val_loss": 0.09680633544921875,
|
| 148 |
-
"train_l0": 7.959979772567749,
|
| 149 |
-
"val_l0": 6.511647701263428
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.21320170164108276,
|
| 154 |
-
"val_loss": 0.07060699462890625,
|
| 155 |
-
"train_l0": 7.776623964309692,
|
| 156 |
-
"val_l0": 6.2432026863098145
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.16217657923698425,
|
| 161 |
-
"val_loss": 0.06054611206054687,
|
| 162 |
-
"train_l0": 7.350420951843262,
|
| 163 |
-
"val_l0": 6.335480213165283
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.04712393879890442,
|
| 168 |
-
"val_loss": 0.09563217163085938,
|
| 169 |
-
"train_l0": 5.406129360198975,
|
| 170 |
-
"val_l0": 6.729567050933838
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.04011683166027069,
|
| 175 |
-
"val_loss": 0.11249771118164062,
|
| 176 |
-
"train_l0": 5.306851863861084,
|
| 177 |
-
"val_l0": 6.852078437805176
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 5.306851863861084
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metrics/metrics_L22.json
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"layer": 22,
|
| 3 |
-
"metrics": [
|
| 4 |
-
{
|
| 5 |
-
"step": 200,
|
| 6 |
-
"train_loss": 0.3163594603538513,
|
| 7 |
-
"val_loss": 0.48409576416015626,
|
| 8 |
-
"train_l0": 11.616826057434082,
|
| 9 |
-
"val_l0": 11.526691913604736
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"step": 400,
|
| 13 |
-
"train_loss": 0.011894471012055874,
|
| 14 |
-
"val_loss": 0.16822509765625,
|
| 15 |
-
"train_l0": 10.721015930175781,
|
| 16 |
-
"val_l0": 11.04982852935791
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"step": 600,
|
| 20 |
-
"train_loss": 0.19044002890586853,
|
| 21 |
-
"val_loss": 0.16395263671875,
|
| 22 |
-
"train_l0": 10.421913862228394,
|
| 23 |
-
"val_l0": 10.223636627197266
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"step": 800,
|
| 27 |
-
"train_loss": 0.14313282072544098,
|
| 28 |
-
"val_loss": 0.2520759582519531,
|
| 29 |
-
"train_l0": 10.149532556533813,
|
| 30 |
-
"val_l0": 9.954025745391846
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"step": 1000,
|
| 34 |
-
"train_loss": 0.16893978416919708,
|
| 35 |
-
"val_loss": 0.11721267700195312,
|
| 36 |
-
"train_l0": 9.874510765075684,
|
| 37 |
-
"val_l0": 10.028002262115479
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"step": 1200,
|
| 41 |
-
"train_loss": 0.1368771493434906,
|
| 42 |
-
"val_loss": 0.16338958740234374,
|
| 43 |
-
"train_l0": 9.435838460922241,
|
| 44 |
-
"val_l0": 9.568281173706055
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"step": 1400,
|
| 48 |
-
"train_loss": 0.19832155108451843,
|
| 49 |
-
"val_loss": 0.10985565185546875,
|
| 50 |
-
"train_l0": 9.0861976146698,
|
| 51 |
-
"val_l0": 8.976943492889404
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"step": 1600,
|
| 55 |
-
"train_loss": 0.18035824596881866,
|
| 56 |
-
"val_loss": 0.12503204345703126,
|
| 57 |
-
"train_l0": 9.632402658462524,
|
| 58 |
-
"val_l0": 9.286336898803711
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"step": 1800,
|
| 62 |
-
"train_loss": 0.27429118752479553,
|
| 63 |
-
"val_loss": 0.035085296630859374,
|
| 64 |
-
"train_l0": 9.70383882522583,
|
| 65 |
-
"val_l0": 9.053497314453125
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"step": 2000,
|
| 69 |
-
"train_loss": 0.21512633562088013,
|
| 70 |
-
"val_loss": 0.13494415283203126,
|
| 71 |
-
"train_l0": 8.99004340171814,
|
| 72 |
-
"val_l0": 8.971784114837646
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"step": 2200,
|
| 76 |
-
"train_loss": 0.14793342351913452,
|
| 77 |
-
"val_loss": 0.12615814208984374,
|
| 78 |
-
"train_l0": 9.398806095123291,
|
| 79 |
-
"val_l0": 9.186358451843262
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"step": 2400,
|
| 83 |
-
"train_loss": 0.14437006413936615,
|
| 84 |
-
"val_loss": 0.047509765625,
|
| 85 |
-
"train_l0": 9.571892023086548,
|
| 86 |
-
"val_l0": 8.61109733581543
|
| 87 |
-
},
|
| 88 |
-
{
|
| 89 |
-
"step": 2600,
|
| 90 |
-
"train_loss": 0.13719600439071655,
|
| 91 |
-
"val_loss": 0.100848388671875,
|
| 92 |
-
"train_l0": 9.457260370254517,
|
| 93 |
-
"val_l0": 8.848090171813965
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"step": 2800,
|
| 97 |
-
"train_loss": 0.04593579098582268,
|
| 98 |
-
"val_loss": 0.07024383544921875,
|
| 99 |
-
"train_l0": 8.39005708694458,
|
| 100 |
-
"val_l0": 9.266834259033203
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"step": 3000,
|
| 104 |
-
"train_loss": 0.07460154592990875,
|
| 105 |
-
"val_loss": 0.064019775390625,
|
| 106 |
-
"train_l0": 7.641005516052246,
|
| 107 |
-
"val_l0": 7.518014907836914
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"step": 3200,
|
| 111 |
-
"train_loss": 0.0363149531185627,
|
| 112 |
-
"val_loss": 0.017144775390625,
|
| 113 |
-
"train_l0": 7.548820972442627,
|
| 114 |
-
"val_l0": 7.40659236907959
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"step": 3400,
|
| 118 |
-
"train_loss": 0.0899328663945198,
|
| 119 |
-
"val_loss": 0.06250228881835937,
|
| 120 |
-
"train_l0": 8.33696722984314,
|
| 121 |
-
"val_l0": 8.085470199584961
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"step": 3600,
|
| 125 |
-
"train_loss": 0.03610712289810181,
|
| 126 |
-
"val_loss": 0.02095489501953125,
|
| 127 |
-
"train_l0": 8.072781562805176,
|
| 128 |
-
"val_l0": 7.350597381591797
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"step": 3800,
|
| 132 |
-
"train_loss": 0.04234137758612633,
|
| 133 |
-
"val_loss": 0.06560287475585938,
|
| 134 |
-
"train_l0": 7.59042501449585,
|
| 135 |
-
"val_l0": 7.564525604248047
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"step": 4000,
|
| 139 |
-
"train_loss": 0.022174539044499397,
|
| 140 |
-
"val_loss": 0.05243072509765625,
|
| 141 |
-
"train_l0": 8.130782842636108,
|
| 142 |
-
"val_l0": 8.100905418395996
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"step": 4200,
|
| 146 |
-
"train_loss": 0.04937244951725006,
|
| 147 |
-
"val_loss": 0.062067413330078126,
|
| 148 |
-
"train_l0": 8.077806234359741,
|
| 149 |
-
"val_l0": 8.028426170349121
|
| 150 |
-
},
|
| 151 |
-
{
|
| 152 |
-
"step": 4400,
|
| 153 |
-
"train_loss": 0.07710352540016174,
|
| 154 |
-
"val_loss": 0.04609222412109375,
|
| 155 |
-
"train_l0": 7.644528150558472,
|
| 156 |
-
"val_l0": 8.020617961883545
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"step": 4600,
|
| 160 |
-
"train_loss": 0.07687506824731827,
|
| 161 |
-
"val_loss": 0.10608062744140626,
|
| 162 |
-
"train_l0": 9.28276777267456,
|
| 163 |
-
"val_l0": 8.763034343719482
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"step": 4800,
|
| 167 |
-
"train_loss": 0.07844512909650803,
|
| 168 |
-
"val_loss": 0.06020050048828125,
|
| 169 |
-
"train_l0": 8.65015983581543,
|
| 170 |
-
"val_l0": 7.823216915130615
|
| 171 |
-
},
|
| 172 |
-
{
|
| 173 |
-
"step": 5000,
|
| 174 |
-
"train_loss": 0.061685673892498016,
|
| 175 |
-
"val_loss": 0.12248992919921875,
|
| 176 |
-
"train_l0": 8.422619104385376,
|
| 177 |
-
"val_l0": 8.659741878509521
|
| 178 |
-
}
|
| 179 |
-
],
|
| 180 |
-
"summary": {
|
| 181 |
-
"dead_features": 0,
|
| 182 |
-
"dead_pct": 0.0,
|
| 183 |
-
"final_l0_pct": 8.422619104385376
|
| 184 |
-
}
|
| 185 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|