Nithiwat
/

deberta-v3-base_claimbuster

Text Classification

Model card Files Files and versions

deberta-v3-base_claimbuster / config.json

Nithiwat's picture

Update config.json

dc002a4 over 3 years ago

history blame contribute delete

2.34 kB

	{
	"model": {
	"names": [
	"hf_text"
	],
	"hf_text": {
	"checkpoint_name": "microsoft/deberta-v3-base",
	"gradient_checkpointing": false,
	"pooling_mode": "cls",
	"data_types": [
	"text"
	],
	"tokenizer_name": "hf_auto",
	"max_text_len": 512,
	"insert_sep": true,
	"text_segment_num": 2,
	"stochastic_chunk": false,
	"text_aug_detect_length": 10,
	"text_trivial_aug_maxscale": 0,
	"text_train_augment_types": null
	}
	},
	"data": {
	"image": {
	"missing_value_strategy": "skip"
	},
	"text": null,
	"categorical": {
	"minimum_cat_count": 100,
	"maximum_num_cat": 20,
	"convert_to_text": true
	},
	"numerical": {
	"convert_to_text": false,
	"scaler_with_mean": true,
	"scaler_with_std": true
	},
	"label": {
	"numerical_label_preprocessing": "standardscaler"
	},
	"pos_label": null,
	"mixup": {
	"turn_on": false,
	"mixup_alpha": 0.8,
	"cutmix_alpha": 1,
	"cutmix_minmax": null,
	"prob": 1,
	"switch_prob": 0.5,
	"mode": "batch",
	"turn_off_epoch": 5,
	"label_smoothing": 0.1
	}
	},
	"optimization": {
	"optim_type": "adamw",
	"learning_rate": 0.0001,
	"weight_decay": 0.001,
	"lr_choice": "layerwise_decay",
	"lr_decay": 0.9,
	"lr_schedule": "cosine_decay",
	"max_epochs": 10,
	"max_steps": -1,
	"warmup_steps": 0.1,
	"end_lr": 0,
	"lr_mult": 1,
	"patience": 10,
	"val_check_interval": 0.5,
	"gradient_clip_val": 1,
	"gradient_clip_algorithm": "norm",
	"track_grad_norm": -1,
	"log_every_n_steps": 10,
	"top_k": 3,
	"top_k_average_method": "greedy_soup",
	"efficient_finetune": null,
	"lora": {
	"r": 8,
	"alpha": 8,
	"filter": [
	"query",
	"value",
	"^q$",
	"^v$",
	"^k$",
	"^o$"
	]
	},
	"loss_function": "auto"
	},
	"env": {
	"num_gpus": -1,
	"num_nodes": 1,
	"batch_size": 128,
	"per_gpu_batch_size": 2,
	"eval_batch_size_ratio": 4,
	"per_gpu_batch_size_evaluation": null,
	"precision": 16,
	"num_workers": 2,
	"num_workers_evaluation": 2,
	"fast_dev_run": false,
	"deterministic": false,
	"auto_select_gpus": true,
	"strategy": "ddp_spawn"
	}
	}