maximuspowers's picture
Upload model, config, and documentation
68a7a5c verified
dataloader:
num_workers: 0
pin_memory: true
dataset:
cache_dir: .cache/classifier_data
hf_dataset: maximuspowers/muat-mean-std
input_mode: signature
max_dimensions:
max_layers: 13
max_neurons_per_layer: 8
max_sequence_length: 5
neuron_profile:
methods:
mean: {}
std: {}
patterns:
- palindrome
- sorted_ascending
- sorted_descending
- alternating
- contains_abc
- starts_with
- ends_with
- no_repeats
- has_majority
- increasing_pairs
- decreasing_pairs
- vowel_consonant
- first_last_match
- mountain_pattern
random_seed: 42
test_split: 0.1
train_split: 0.8
val_split: 0.1
device:
type: auto
evaluation:
decision_threshold: 0.5
metrics:
- accuracy_exact_match
- accuracy_hamming
- precision_macro
- recall_macro
- f1_macro
- f1_micro
per_pattern_metrics: true
hub:
enabled: true
private: false
push_frequency: epoch
push_logs: true
push_metrics: true
push_model: true
repo_id: maximuspowers/muat-mean-std-classifier
token: <REDACTED>
logging:
checkpoint:
enabled: true
mode: max
monitor: val_f1_macro
save_best_only: true
save_dir: ./checkpoints/classifier_mean_std
tensorboard:
enabled: true
log_dir: ./runs/classifier_mean_std
log_interval: 10
verbose: true
model:
fusion:
activation: relu
dropout: 0.2
hidden_dims:
- 128
- 64
output:
num_patterns: 14
signature_encoder:
activation: relu
dropout: 0.2
hidden_dims:
- 512
- 256
- 256
- 128
use_batch_norm: true
weight_encoder:
activation: relu
dropout: 0.2
training:
batch_size: 16
early_stopping:
enabled: true
mode: min
monitor: val_loss
patience: 50
epochs: 1000
learning_rate: 0.001
loss: bce_with_logits
lr_scheduler:
enabled: true
factor: 0.5
min_lr: 1.0e-05
patience: 20
type: reduce_on_plateau
optimizer: adam
pos_weight: null
weight_decay: 0.0001