datasets:
  - name: chatalpaca_multiturn_enriched
    repo_id: BRlkl/chatalpaca-multiturn-enriched
    source_split: train
    format: messages_all_turns
    validation_ratio: 0.02
    split_seed: 17
    min_turns: 2
    max_turns: 6
    max_message_chars: 6000
    use_base_chat_template: true

model:
  base_model_name: google/t5gemma-l-l-prefixlm-it
  initial_model_path: BRlkl/test_1024
  dtype: bfloat16
  attn_implementation: sdpa
  disable_cudnn_sdp: true
  disable_mha_fastpath: true
  magicnorm_eps: 1.0e-6
  z_slots: 1024
  num_time_tokens: 0
  use_explicit_time_features: false
  gate_attention_heads: 4
  max_observation_tokens: 1024
  max_decoder_tokens: 1024
  thought_loop_proposal_mode: observation_hidden_compression
  preserve_observation_encoder_manifold: true
  observation_encoder_use_state_context: true
  latent_attention_mask_mode: full
  hard_state_replace: true

training:
  seed: 17
  num_workers: 2
  gradient_checkpointing: true
  mixed_precision: bf16
  max_grad_norm: 1.0
  weight_decay: 0.01
  backbone_learning_rate: 5.0e-6
  new_module_learning_rate: 1.0e-4
  adam_beta1: 0.9
  adam_beta2: 0.95
  adam_epsilon: 1.0e-8
  fused_adamw: true
  freeze_gate_head: true
  assistant_feedback_mode: teacher_forced
  log_every_steps: 1
  eval_every_steps: 100
  checkpoint_every_steps: 500
  eval_max_batches: 16
  validation_behavior_max_batches: 4
  max_train_examples:
  max_validation_examples:

  response_loss_weight: 0.33
  current_user_reconstruction_loss_weight: 0.33
  probe_loss_weight: 0.33
  probe_question_text: "What is everything we have talked about so far? Give exact conversation transcript verbatim in following format: [User 1]: X [Assistant 1]: Y [User 2]: A etc"

  feedback_generation_max_new_tokens: 1024
  feedback_generation_extra_new_tokens: 16
  validation_response_max_new_tokens: 1024
  validation_response_extra_new_tokens: 16
  validation_probe_max_new_tokens: 1024
  validation_probe_extra_new_tokens: 16

  wandb_train_metric_keys:
    - train/loss_total
    - train/loss_response
    - train/loss_current_user_reconstruction
    - train/loss_probe
    - train/response_first_token_exact_match
    - train/current-user_reconstruction_first_token_exact_match
    - train/probe_first_token_exact_match
  wandb_validation_metric_keys:
    - validation/loss_total
    - validation/loss_response
    - validation/loss_current_user_reconstruction
    - validation/loss_probe
    - validation/goal_loss
    - validation/response_similarity
    - validation/response_reconstruction_similarity
    - validation/probe_transcript_similarity

  checkpoint_selection_metric: validation/goal_loss
  checkpoint_selection_mode: min
  validation_response_exact_miss_penalty: 1.0
  validation_reconstruction_similarity_miss_penalty: 1.0
  validation_probe_exact_miss_penalty: 1.0
  validation_probe_similarity_miss_penalty: 2.0

phase:
  micro_batch_size: 10
  eval_batch_size: 10
  gradient_accumulation_steps: 4
  num_train_epochs: 5
  warmup_ratio: 0.03
  shuffle_train: true

cache:
  preprocessed_root: cache/preprocessed_pre_sft_multiturn_simple_transcript

paths:
  run_root: runs_pre_sft_multiturn_simple_transcript
  export_root: exports_multiturn_simple_transcript

inference:
  format: predictive_state_multiturn
  use_base_chat_template: true

wandb:
  enabled: true
  project: samantha-pre-sft
  run_name: t5gemma2-thoughtloop-pre-sft-simple-transcript

hub:
  model_repo_id: BRlkl/test_multiturn_simple_transcript1024_2
  private: false