| { | |
| "command": "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-3aca3dc1-4d01-4666-980c-0422f6452525.json", | |
| "compute": { | |
| "arch_list": [ | |
| "sm_50", | |
| "sm_60", | |
| "sm_70", | |
| "sm_75", | |
| "sm_80", | |
| "sm_86", | |
| "sm_90" | |
| ], | |
| "devices": { | |
| "0": { | |
| "device_capability": [ | |
| 8, | |
| 0 | |
| ], | |
| "device_properties": "_CudaDeviceProperties(name='NVIDIA A100-SXM4-40GB', major=8, minor=0, total_memory=40513MB, multi_processor_count=108)", | |
| "gpu_type": "NVIDIA A100-SXM4-40GB" | |
| } | |
| }, | |
| "gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90", | |
| "gpus_per_node": 1, | |
| "num_nodes": 1 | |
| }, | |
| "config": { | |
| "adapter": { | |
| "alpha": 16, | |
| "bias_type": "none", | |
| "dropout": 0.05, | |
| "postprocessor": { | |
| "merge_adapter_into_base_model": false, | |
| "progressbar": false | |
| }, | |
| "pretrained_adapter_weights": null, | |
| "r": 8, | |
| "target_modules": null, | |
| "type": "lora", | |
| "use_dora": false, | |
| "use_rslora": false | |
| }, | |
| "backend": null, | |
| "base_model": "mistralai/Mistral-7B-Instruct-v0.2", | |
| "defaults": { | |
| "text": { | |
| "decoder": { | |
| "fc_activation": "relu", | |
| "fc_bias_initializer": "zeros", | |
| "fc_dropout": 0.0, | |
| "fc_layers": null, | |
| "fc_norm": null, | |
| "fc_norm_params": null, | |
| "fc_output_size": 256, | |
| "fc_use_bias": true, | |
| "fc_weights_initializer": "xavier_uniform", | |
| "input_size": null, | |
| "max_new_tokens": null, | |
| "num_fc_layers": 0, | |
| "pretrained_model_name_or_path": "", | |
| "tokenizer": "hf_tokenizer", | |
| "type": "text_extractor", | |
| "vocab_file": "" | |
| }, | |
| "encoder": { | |
| "skip": false, | |
| "type": "passthrough" | |
| }, | |
| "loss": { | |
| "class_similarities": null, | |
| "class_similarities_temperature": 0, | |
| "class_weights": null, | |
| "confidence_penalty": 0, | |
| "robust_lambda": 0, | |
| "type": "next_token_softmax_cross_entropy", | |
| "unique": false, | |
| "weight": 1.0 | |
| }, | |
| "preprocessing": { | |
| "cache_encoder_embeddings": false, | |
| "compute_idf": false, | |
| "computed_fill_value": "<UNK>", | |
| "fill_value": "<UNK>", | |
| "lowercase": false, | |
| "max_sequence_length": 256, | |
| "missing_value_strategy": "fill_with_const", | |
| "most_common": 20000, | |
| "ngram_size": 2, | |
| "padding": "right", | |
| "padding_symbol": "<PAD>", | |
| "pretrained_model_name_or_path": null, | |
| "prompt": { | |
| "retrieval": { | |
| "index_name": null, | |
| "k": 0, | |
| "model_name": null, | |
| "type": null | |
| }, | |
| "task": null, | |
| "template": null | |
| }, | |
| "sequence_length": null, | |
| "tokenizer": "space_punct", | |
| "unknown_symbol": "<UNK>", | |
| "vocab_file": null | |
| } | |
| } | |
| }, | |
| "generation": { | |
| "bad_words_ids": null, | |
| "begin_suppress_tokens": null, | |
| "bos_token_id": null, | |
| "diversity_penalty": 0.0, | |
| "do_sample": true, | |
| "early_stopping": false, | |
| "encoder_repetition_penalty": 1.0, | |
| "eos_token_id": null, | |
| "epsilon_cutoff": 0.0, | |
| "eta_cutoff": 0.0, | |
| "exponential_decay_length_penalty": null, | |
| "force_words_ids": null, | |
| "forced_bos_token_id": null, | |
| "forced_decoder_ids": null, | |
| "forced_eos_token_id": null, | |
| "guidance_scale": null, | |
| "length_penalty": 1.0, | |
| "max_length": 32, | |
| "max_new_tokens": 512, | |
| "max_time": null, | |
| "min_length": 0, | |
| "min_new_tokens": null, | |
| "no_repeat_ngram_size": 0, | |
| "num_beam_groups": 1, | |
| "num_beams": 1, | |
| "pad_token_id": null, | |
| "penalty_alpha": null, | |
| "prompt_lookup_num_tokens": null, | |
| "remove_invalid_values": false, | |
| "renormalize_logits": false, | |
| "repetition_penalty": 1.0, | |
| "sequence_bias": null, | |
| "suppress_tokens": null, | |
| "temperature": 0.1, | |
| "top_k": 50, | |
| "top_p": 1.0, | |
| "typical_p": 1.0, | |
| "use_cache": true | |
| }, | |
| "hyperopt": null, | |
| "input_features": [ | |
| { | |
| "active": true, | |
| "column": "question", | |
| "encoder": { | |
| "skip": false, | |
| "type": "passthrough" | |
| }, | |
| "name": "question", | |
| "preprocessing": { | |
| "cache_encoder_embeddings": false, | |
| "compute_idf": false, | |
| "computed_fill_value": "<UNK>", | |
| "fill_value": "<UNK>", | |
| "lowercase": false, | |
| "max_sequence_length": null, | |
| "missing_value_strategy": "fill_with_const", | |
| "most_common": 20000, | |
| "ngram_size": 2, | |
| "padding": "left", | |
| "padding_symbol": "<PAD>", | |
| "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", | |
| "sequence_length": null, | |
| "tokenizer": "hf_tokenizer", | |
| "unknown_symbol": "<UNK>", | |
| "vocab_file": null | |
| }, | |
| "proc_column": "question_Nlu_HO", | |
| "tied": null, | |
| "type": "text" | |
| } | |
| ], | |
| "ludwig_version": "0.10.2", | |
| "model_parameters": null, | |
| "model_type": "llm", | |
| "output_features": [ | |
| { | |
| "active": true, | |
| "class_similarities": null, | |
| "column": "record_id", | |
| "decoder": { | |
| "fc_activation": "relu", | |
| "fc_bias_initializer": "zeros", | |
| "fc_dropout": 0.0, | |
| "fc_layers": null, | |
| "fc_norm": null, | |
| "fc_norm_params": null, | |
| "fc_output_size": 256, | |
| "fc_use_bias": true, | |
| "fc_weights_initializer": "xavier_uniform", | |
| "input_size": null, | |
| "max_new_tokens": 512, | |
| "num_fc_layers": 0, | |
| "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", | |
| "tokenizer": "hf_tokenizer", | |
| "type": "text_extractor", | |
| "vocab_file": "" | |
| }, | |
| "default_validation_metric": "loss", | |
| "dependencies": [], | |
| "input_size": null, | |
| "loss": { | |
| "class_similarities": null, | |
| "class_similarities_temperature": 0, | |
| "class_weights": null, | |
| "confidence_penalty": 0, | |
| "robust_lambda": 0, | |
| "type": "next_token_softmax_cross_entropy", | |
| "unique": false, | |
| "weight": 1.0 | |
| }, | |
| "name": "record_id", | |
| "num_classes": null, | |
| "preprocessing": { | |
| "cache_encoder_embeddings": false, | |
| "compute_idf": false, | |
| "computed_fill_value": "<UNK>", | |
| "fill_value": "<UNK>", | |
| "lowercase": false, | |
| "max_sequence_length": null, | |
| "missing_value_strategy": "drop_row", | |
| "most_common": 20000, | |
| "ngram_size": 2, | |
| "padding": "left", | |
| "padding_symbol": "<PAD>", | |
| "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", | |
| "sequence_length": null, | |
| "tokenizer": "hf_tokenizer", | |
| "unknown_symbol": "<UNK>", | |
| "vocab_file": null | |
| }, | |
| "proc_column": "record_id_D_Znvc", | |
| "reduce_dependencies": "sum", | |
| "reduce_input": "sum", | |
| "type": "text" | |
| } | |
| ], | |
| "preprocessing": { | |
| "global_max_sequence_length": 512, | |
| "oversample_minority": null, | |
| "sample_ratio": 1.0, | |
| "sample_size": null, | |
| "split": { | |
| "probabilities": [ | |
| 1.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "type": "random" | |
| }, | |
| "undersample_majority": null | |
| }, | |
| "prompt": { | |
| "retrieval": { | |
| "index_name": null, | |
| "k": 0, | |
| "model_name": null, | |
| "type": null | |
| }, | |
| "task": null, | |
| "template": "Below is an instruction that describes a task, paired with an input that provides further context with language code and country code. Write a answer that appropriately answers the question with respect to the country code and language code.if the input provided consists of combination of two questions, then provide two record id or else only provide one record id only.\n### country_code: {country_code}\n### language_code: {language_code}\n### Instruction: {question}\n### answer:" | |
| }, | |
| "quantization": { | |
| "bits": 4, | |
| "bnb_4bit_compute_dtype": "float16", | |
| "bnb_4bit_quant_type": "nf4", | |
| "bnb_4bit_use_double_quant": true, | |
| "llm_int8_has_fp16_weight": false, | |
| "llm_int8_threshold": 6.0 | |
| }, | |
| "trainer": { | |
| "base_learning_rate": 0.0, | |
| "batch_size": 1, | |
| "bucketing_field": null, | |
| "checkpoints_per_epoch": 0, | |
| "compile": false, | |
| "early_stop": 5, | |
| "effective_batch_size": "auto", | |
| "enable_gradient_checkpointing": false, | |
| "enable_profiling": false, | |
| "epochs": 25, | |
| "eval_batch_size": 2, | |
| "eval_steps": null, | |
| "evaluate_training_set": false, | |
| "gradient_accumulation_steps": 16, | |
| "gradient_clipping": { | |
| "clipglobalnorm": 0.5, | |
| "clipnorm": null, | |
| "clipvalue": null | |
| }, | |
| "increase_batch_size_eval_metric": "loss", | |
| "increase_batch_size_eval_split": "training", | |
| "increase_batch_size_on_plateau": 0, | |
| "increase_batch_size_on_plateau_patience": 5, | |
| "increase_batch_size_on_plateau_rate": 2.0, | |
| "learning_rate": 0.0004, | |
| "learning_rate_scaling": "linear", | |
| "learning_rate_scheduler": { | |
| "decay": "cosine", | |
| "decay_rate": 0.96, | |
| "decay_steps": 10000, | |
| "eta_min": 0, | |
| "reduce_eval_metric": "loss", | |
| "reduce_eval_split": "training", | |
| "reduce_on_plateau": 0, | |
| "reduce_on_plateau_patience": 10, | |
| "reduce_on_plateau_rate": 0.1, | |
| "staircase": false, | |
| "t_0": null, | |
| "t_mult": 1, | |
| "warmup_evaluations": 0, | |
| "warmup_fraction": 0.03 | |
| }, | |
| "max_batch_size": 1099511627776, | |
| "optimizer": { | |
| "amsgrad": false, | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "block_wise": true, | |
| "eps": 1e-08, | |
| "percentile_clipping": 100, | |
| "type": "paged_adam", | |
| "weight_decay": 0.0 | |
| }, | |
| "profiler": { | |
| "active": 3, | |
| "repeat": 5, | |
| "skip_first": 0, | |
| "wait": 1, | |
| "warmup": 1 | |
| }, | |
| "regularization_lambda": 0.0, | |
| "regularization_type": "l2", | |
| "should_shuffle": true, | |
| "skip_all_evaluation": false, | |
| "steps_per_checkpoint": 0, | |
| "train_steps": null, | |
| "type": "finetune", | |
| "use_mixed_precision": false, | |
| "validation_field": "record_id", | |
| "validation_metric": "loss" | |
| } | |
| }, | |
| "data_format": "<class 'pandas.core.frame.DataFrame'>", | |
| "ludwig_version": "0.10.2", | |
| "random_seed": 42, | |
| "torch_version": "2.2.1+cu121" | |
| } |