|
|
run: |
|
|
run_dir: "./runs/instruct_run_24b" |
|
|
seed: 42 |
|
|
|
|
|
|
|
|
wandb: |
|
|
enabled: true |
|
|
project: "sft-training" |
|
|
entity: null |
|
|
name: null |
|
|
tags: ["sft-lora", "24b-Devstral"] |
|
|
notes: null |
|
|
|
|
|
model: |
|
|
|
|
|
repo_id: "./CPT/runs/cpt_run_v1/merged_24b_cpt_lora" |
|
|
revision: null |
|
|
|
|
|
|
|
|
base_local_dir: "base_model" |
|
|
|
|
|
trust_remote_code: true |
|
|
tokenizer_use_fast: true |
|
|
device_map: "auto" |
|
|
|
|
|
torch_dtype: "bfloat16" |
|
|
|
|
|
|
|
|
use_4bit: false |
|
|
bnb_4bit_quant_type: "nf4" |
|
|
bnb_4bit_use_double_quant: false |
|
|
bnb_4bit_compute_dtype: "bfloat16" |
|
|
|
|
|
|
|
|
attn_implementation: null |
|
|
|
|
|
data: |
|
|
train_jsonl: "../sft_dataset.jsonl" |
|
|
eval_jsonl: null |
|
|
eval_split_ratio: 0.1 |
|
|
|
|
|
|
|
|
instruction_field: "instruction" |
|
|
input_field: "input" |
|
|
output_field: "output" |
|
|
|
|
|
|
|
|
format_type: "custom" |
|
|
|
|
|
|
|
|
system_prompt: | |
|
|
You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Explain the data flow and why each component must change: |
|
|
- Flow: [Input → Processing → Output with arrows] |
|
|
- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]" |
|
|
- Explain coupling between components |
|
|
|
|
|
|
|
|
modify::crates/path/to/file.rs::impl::ComponentName |
|
|
add::crates/another/file.rs::function::AnotherComponent |
|
|
<EOS> |
|
|
|
|
|
|
|
|
|
|
|
1. Use full paths: `remove::crates/folder/file.rs::Type::Name` |
|
|
2. Use `::` for nested items: `status::StructName::Type::Name` |
|
|
3. Always explain "must change because" and "without this" |
|
|
3. Types of components: function, struct, enum, impl, trait |
|
|
4. If there is extra information (e.g., enum variants), include that too. |
|
|
5. Start with |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Add webhook subscription support |
|
|
|
|
|
|
|
|
The webhook system routes events via EventClass enum. Flow: webhook → EventClass → handler → processing. The EventClass enum (crates/common_enums/src/enums.rs::EventClass) must add Subscriptions variant because it defines event routing—without this, subscription events cannot be processed. The SubscriptionStatus impl (crates/common_enums/src/transformers.rs::SubscriptionStatus) must map to EventType because it converts status to events—without this, status changes don't trigger webhooks. These are coupled: EventClass routes to handlers that use SubscriptionStatus mappings. |
|
|
|
|
|
|
|
|
crates/common_enums/src/enums.rs::EventClass |
|
|
crates/common_enums/src/transformers.rs::SubscriptionStatus |
|
|
<EOS> |
|
|
|
|
|
|
|
|
custom_template: "##INSTRUCTION\n{instruction}<|im_end|>\n##TASK\n{input}<|im_end|>\n##OUTPUT\n{output}<|im_end|>" |
|
|
|
|
|
max_length: 2048 |
|
|
shuffle: true |
|
|
num_proc: 4 |
|
|
|
|
|
peft: |
|
|
enabled: true |
|
|
r: 8 |
|
|
lora_alpha: 16 |
|
|
lora_dropout: 0.05 |
|
|
bias: "none" |
|
|
target_modules: "auto" |
|
|
|
|
|
train: |
|
|
|
|
|
num_train_epochs: 6 |
|
|
|
|
|
per_device_train_batch_size: 1 |
|
|
per_device_eval_batch_size: 1 |
|
|
gradient_accumulation_steps: 8 |
|
|
|
|
|
learning_rate: 1e-4 |
|
|
weight_decay: 0.0 |
|
|
warmup_ratio: 0.08 |
|
|
lr_scheduler_type: "cosine" |
|
|
|
|
|
optim: "adamw_torch" |
|
|
max_grad_norm: 0.8 |
|
|
gradient_checkpointing: true |
|
|
|
|
|
logging_steps: 2 |
|
|
save_strategy: "steps" |
|
|
save_steps: 500 |
|
|
save_total_limit: 20 |
|
|
|
|
|
evaluation_strategy: "steps" |
|
|
eval_steps: 100 |
|
|
load_best_model_at_end: true |
|
|
|
|
|
|
|
|
early_stopping: |
|
|
enabled: true |
|
|
patience: 3 |
|
|
min_delta: 0.001 |
|
|
metric: "eval_loss" |
|
|
mode: "min" |
|
|
|
|
|
resume_from_checkpoint: "auto" |
|
|
|
|
|
merge: |
|
|
enabled: true |
|
|
merged_dtype: "float16" |
|
|
max_shard_size: "2GB" |
|
|
output_dir: "./merged_24b_instruct_lora" |