2ira
/

Byte-lingua-code

Model card Files Files and versions

Byte-lingua-code / apps /mamba /configs /debug.yaml

2ira's picture

offline_compression_graph_code

72c0672 verified 4 months ago

history blame contribute delete

1.58 kB

	# Template config, need to change dump_dir, data.root_dir and tokenizer.path
	# Evals can be activated by uncommenting its config
	# python -m lingua.stool config=apps/main/configs/debug.yaml nodes=8 account=fair_amaia_cw_codegen qos=lowest

	# dump_dir: !!CHANGETHIS!!
	name: "debug"
	steps: 1000
	probe_freq: null #10
	seed: 777
	optim:
	lr: 3e-4
	warmup: 2000
	lr_min_ratio: 0.000001
	clip: 1.0

	distributed:
	fsdp_type: full_shard
	compile: true
	model_dtype: bf16
	matmul_allow_tf32: false
	selective_activation_checkpointing: true
	tp_size: 1

	model:
	dim: 1024
	n_layers: 8
	n_heads: 16
	state_dim: 128
	conv_size: 4
	dt_bias: false
	#init_std_factor: "current_depth"
	init_args:
	dt_max: 0.01
	dt_min: 0.00001

	dt_init_floor: 1e-4

	A_init_min: 0.1
	A_init_max: 2

	data:
	root_dir: data/shuffled/llama2
	sources:
	wikipedia: 80.0
	arxiv: 20.0
	batch_size: 64
	prefetch_size: 64
	seq_len: 1024
	n_views: 2
	load_async: true
	tokenizer:
	name: sp
	path: tokenizers/tokenizer_final_32k.minus_inf_ws.model

	profiling:
	run: true

	checkpoint:
	dump:
	every: 100
	keep: 2
	eval:
	every: 100
	keep: 1

	logging:
	freq: 10

	eval:
	generator:
	max_tokens: 8192
	dtype: bf16
	temperature: 1.0
	top_p: 0.95
	harness:
	tasks:
	- hellaswag
	- piqa
	- task: nq_open
	num_fewshot: 5
	validation:
	max_steps: 100