Add files using upload-large-folder tool

Browse files

Files changed (8) hide show

.gitattributes +1 -0
debug.log +264 -0
merged/chat_template.jinja +4 -0
merged/config.json +71 -0
merged/generation_config.json +7 -0
merged/model.safetensors +3 -0
merged/tokenizer.json +3 -0
merged/tokenizer_config.json +15 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text

debug.log ADDED Viewed

	@@ -0,0 +1,264 @@

+[2026-03-23 14:34:32,736] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:26511] baseline 0.000GB ()
+[2026-03-23 14:34:32,739] [INFO] [axolotl.cli.config.load_cfg:341] [PID:26511] config:
+{
+  "activation_offloading": false,
+  "adapter": "lora",
+  "axolotl_config_path": "config.yml",
+  "base_model": "Intelligent-Internet/II-Medical-8B",
+  "base_model_config": "Intelligent-Internet/II-Medical-8B",
+  "batch_size": 16,
+  "bf16": true,
+  "capabilities": {
+    "bf16": true,
+    "compute_capability": "sm_100",
+    "fp8": true,
+    "n_gpu": 1,
+    "n_node": 1,
+    "tf32": true
+  },
+  "chat_template": "tokenizer_default",
+  "context_parallel_size": 1,
+  "dataloader_num_workers": 1,
+  "dataloader_pin_memory": true,
+  "dataloader_prefetch_factor": 256,
+  "dataset_num_proc": 24,
+  "dataset_prepared_path": "last_run_prepared",
+  "datasets": [
+    {
+      "message_property_mappings": {
+        "content": "content",
+        "role": "role"
+      },
+      "path": "ruslanmv/HealthCareMagic-100k",
+      "trust_remote_code": false,
+      "type": "alpaca"
+    },
+    {
+      "message_property_mappings": {
+        "content": "content",
+        "role": "role"
+      },
+      "path": "medalpaca/medical_meadow_mediqa",
+      "trust_remote_code": false,
+      "type": "alpaca"
+    },
+    {
+      "message_property_mappings": {
+        "content": "content",
+        "role": "role"
+      },
+      "path": "medalpaca/medical_meadow_medical_flashcards",
+      "trust_remote_code": false,
+      "type": "alpaca"
+    },
+    {
+      "message_property_mappings": {
+        "content": "content",
+        "role": "role"
+      },
+      "path": "ruslanmv/icliniq-7k",
+      "trust_remote_code": false,
+      "type": {
+        "field_instruction": "input",
+        "field_output": "answer_icliniq",
+        "format": "{instruction}",
+        "no_input_format": "{instruction}",
+        "system_prompt": "You are a helpful medical assistant."
+      }
+    },
+    {
+      "message_property_mappings": {
+        "content": "content",
+        "role": "role"
+      },
+      "path": "keivalya/MedQuad-MedicalQnADataset",
+      "trust_remote_code": false,
+      "type": {
+        "field_instruction": "Question",
+        "field_output": "Answer",
+        "format": "{instruction}",
+        "no_input_format": "{instruction}",
+        "system_prompt": "You are a helpful medical assistant."
+      }
+    },
+    {
+      "message_property_mappings": {
+        "content": "content",
+        "role": "role"
+      },
+      "path": "mohammad2928git/complete_medical_symptom_dataset",
+      "trust_remote_code": false,
+      "type": {
+        "field_instruction": "text",
+        "field_output": "Name",
+        "format": "{instruction}",
+        "no_input_format": "{instruction}",
+        "system_prompt": "You are a helpful medical diagnostic assistant. Based on the patient's symptoms, identify the most likely condition."
+      }
+    },
+    {
+      "field": "page_text",
+      "message_property_mappings": {
+        "content": "content",
+        "role": "role"
+      },
+      "path": "gamino/wiki_medical_terms",
+      "trust_remote_code": false,
+      "type": "completion"
+    }
+  ],
+  "ddp": false,
+  "device": "cuda:0",
+  "device_map": "auto",
+  "dion_rank_fraction": 1.0,
+  "dion_rank_multiple_of": 1,
+  "eaft_alpha": 1.0,
+  "eaft_k": 20,
+  "env_capabilities": {
+    "torch_version": "2.9.1"
+  },
+  "eval_batch_size": 8,
+  "eval_causal_lm_metrics": [
+    "sacrebleu",
+    "comet",
+    "ter",
+    "chrf"
+  ],
+  "eval_max_new_tokens": 128,
+  "eval_sample_packing": false,
+  "eval_steps": 0.08333333333333333,
+  "eval_table_size": 0,
+  "evals_per_epoch": 4,
+  "experimental_skip_move_to_device": true,
+  "flash_attention": false,
+  "fp16": false,
+  "generate_samples": false,
+  "generation_do_sample": true,
+  "generation_max_new_tokens": 50,
+  "generation_prompt_ratio": 0.5,
+  "generation_temperature": 0.7,
+  "gradient_accumulation_steps": 1,
+  "gradient_checkpointing": true,
+  "gradient_checkpointing_kwargs": {
+    "use_reentrant": true
+  },
+  "group_by_length": false,
+  "include_tkps": true,
+  "is_falcon_derived_model": false,
+  "is_llama_derived_model": false,
+  "is_mistral_derived_model": false,
+  "learning_rate": 0.0002,
+  "lisa_layers_attribute": "model.layers",
+  "load_best_model_at_end": false,
+  "load_in_4bit": false,
+  "load_in_8bit": false,
+  "local_rank": 0,
+  "logging_steps": 1,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "lora_model_dir": "./medical-llm-out",
+  "lora_r": 32,
+  "lora_target_modules": [
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "loraplus_lr_embedding": 1e-06,
+  "lr_scheduler": "cosine",
+  "mean_resizing_embeddings": false,
+  "merge_lora": true,
+  "micro_batch_size": 16,
+  "model_config_type": "qwen3",
+  "num_epochs": 3.0,
+  "num_generation_samples": 3,
+  "optimizer": "paged_adamw_32bit",
+  "otel_metrics_host": "localhost",
+  "otel_metrics_port": 8000,
+  "output_dir": "./medical-llm-merged",
+  "pad_to_sequence_len": true,
+  "pretrain_multipack_attn": true,
+  "profiler_steps_start": 0,
+  "qlora_sharded_model_loading": false,
+  "quantize_moe_experts": false,
+  "ray_num_workers": 1,
+  "remove_unused_columns": false,
+  "resources_per_worker": {
+    "GPU": 1
+  },
+  "sample_packing": true,
+  "sample_packing_bin_size": 200,
+  "sample_packing_group_size": 100000,
+  "save_only_model": false,
+  "save_safetensors": true,
+  "save_steps": 0.3333333333333333,
+  "saves_per_epoch": 1,
+  "sequence_len": 4096,
+  "shuffle_before_merging_datasets": false,
+  "shuffle_merged_datasets": true,
+  "skip_prepare_dataset": false,
+  "streaming_multipack_buffer_size": 10000,
+  "strict": false,
+  "tensor_parallel_size": 1,
+  "tf32": true,
+  "tiled_mlp_use_original_mlp": true,
+  "tokenizer_config": "Intelligent-Internet/II-Medical-8B",
+  "tokenizer_save_jinja_files": true,
+  "tokenizer_type": "AutoTokenizer",
+  "torch_dtype": "torch.bfloat16",
+  "train_on_inputs": false,
+  "trl": {
+    "async_prefetch": false,
+    "log_completions": false,
+    "mask_truncated_completions": false,
+    "ref_model_mixup_alpha": 0.9,
+    "ref_model_sync_steps": 64,
+    "replay_buffer_size": 0,
+    "replay_recompute_logps": true,
+    "reroll_max_groups": 1,
+    "reroll_start_fraction": 1.0,
+    "reward_num_workers": 1,
+    "scale_rewards": true,
+    "skip_zero_advantage_batches": true,
+    "sync_ref_model": false,
+    "use_data_producer": false,
+    "use_vllm": false,
+    "vllm_lora_sync": false,
+    "vllm_server_host": "0.0.0.0",
+    "vllm_server_port": 8000
+  },
+  "type_of_model": "AutoModelForCausalLM",
+  "use_otel_metrics": false,
+  "use_ray": false,
+  "val_set_size": 0.05,
+  "vllm": {
+    "device": "auto",
+    "dtype": "auto",
+    "gpu_memory_utilization": 0.9,
+    "host": "0.0.0.0",
+    "port": 8000
+  },
+  "warmup_steps": 10,
+  "weight_decay": 0.0,
+  "world_size": 1
+}
+[2026-03-23 14:34:32,740] [INFO] [axolotl.cli.utils.load.load_model_and_tokenizer:40] [PID:26511] loading tokenizer... Intelligent-Internet/II-Medical-8B
+[2026-03-23 14:34:34,848] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:299] [PID:26511] EOS: 151645 / <|im_end|>
+[2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:300] [PID:26511] BOS: None / None
+[2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:301] [PID:26511] PAD: 151643 / <|endoftext|>
+[2026-03-23 14:34:34,849] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:302] [PID:26511] UNK: None / None
+[2026-03-23 14:34:34,849] [INFO] [axolotl.cli.utils.load.load_model_and_tokenizer:43] [PID:26511] loading model...
+[2026-03-23 14:34:35,028] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:91] [PID:26511] Patched Trainer.evaluation_loop with nanmean loss calculation
+[2026-03-23 14:34:35,033] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:142] [PID:26511] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
+[2026-03-23 14:34:35,035] [INFO] [axolotl.loaders.patch_manager._apply_multipack_patches:402] [PID:26511] Applying multipack dataloader patch for sample packing...
+[2026-03-23 14:35:06,408] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:361] [PID:26511] Converting modules to torch.bfloat16
+[2026-03-23 14:35:06,416] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:26511] Memory usage after model load 18.735GB (+18.735GB allocated, +19.895GB reserved)
+[2026-03-23 14:35:06,417] [DEBUG] [axolotl.loaders.adapter.load_lora:150] [PID:26511] Loading pretrained PEFT - LoRA
+trainable params: 87,293,952 || all params: 8,278,029,312 || trainable%: 1.0545
+[2026-03-23 14:35:24,275] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:26511] after adapters 15.907GB (+15.907GB allocated, +20.229GB reserved)

merged/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,4 @@

+{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '
+' + message['content'] | trim + '<|im_end|>
+' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
+' }}{% endif %}

merged/config.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "pad_token_id": null,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "rope_theta": 1000000,
+    "rope_type": "default"
+  },
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "transformers_version": "5.3.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

merged/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": 151645,
+  "transformers_version": "5.3.0"
+}

merged/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76404fea37abc0175f836dea4788ca44c7912a414a7814d91a7a2daabde5ccd6
+size 16381517208

merged/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
+size 11422650

merged/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}