TIM commited on
Commit ·
a1b4f6c
1
Parent(s): 7f3dd53
update read model
Browse files- InfiMed.py +2 -2
- config.json +3 -71
InfiMed.py
CHANGED
|
@@ -195,7 +195,7 @@ class InfiMed(PreTrainedModel):
|
|
| 195 |
raise NotImplementedError(
|
| 196 |
f'{config.llm_config.architectures[0]} is not implemented.')
|
| 197 |
|
| 198 |
-
self.tokenizer = AutoTokenizer.from_pretrained(config.
|
| 199 |
self.tokenizer.add_special_tokens({"additional_special_tokens": ["<|endofchunk|>", "<s>", "<|pad|>"]})
|
| 200 |
self.tokenizer.pad_token = "<|pad|>"
|
| 201 |
self.tokenizer.bos_token = "<s>"
|
|
@@ -218,7 +218,7 @@ class InfiMed(PreTrainedModel):
|
|
| 218 |
self.img_context_token_id = 151655
|
| 219 |
|
| 220 |
self.image_processor = SiglipImageProcessor.from_pretrained(
|
| 221 |
-
config.
|
| 222 |
size={"height": 384, "width": 384},
|
| 223 |
resample=PIL.Image.Resampling.BICUBIC,
|
| 224 |
crop_size={"height": 384, "width": 384},
|
|
|
|
| 195 |
raise NotImplementedError(
|
| 196 |
f'{config.llm_config.architectures[0]} is not implemented.')
|
| 197 |
|
| 198 |
+
self.tokenizer = AutoTokenizer.from_pretrained(config._name_or_path, use_fast=True)
|
| 199 |
self.tokenizer.add_special_tokens({"additional_special_tokens": ["<|endofchunk|>", "<s>", "<|pad|>"]})
|
| 200 |
self.tokenizer.pad_token = "<|pad|>"
|
| 201 |
self.tokenizer.bos_token = "<s>"
|
|
|
|
| 218 |
self.img_context_token_id = 151655
|
| 219 |
|
| 220 |
self.image_processor = SiglipImageProcessor.from_pretrained(
|
| 221 |
+
config._name_or_path,
|
| 222 |
size={"height": 384, "width": 384},
|
| 223 |
resample=PIL.Image.Resampling.BICUBIC,
|
| 224 |
crop_size={"height": 384, "width": 384},
|
config.json
CHANGED
|
@@ -1,71 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
],
|
| 5 |
-
"llm_config": {
|
| 6 |
-
"_name_or_path": ".",
|
| 7 |
-
"architectures": [
|
| 8 |
-
"Qwen3ForCausalLM"
|
| 9 |
-
],
|
| 10 |
-
"pad_token_id": 151670,
|
| 11 |
-
"bos_token_id": 128245,
|
| 12 |
-
"eos_token_id": 151645,
|
| 13 |
-
"attention_bias": false,
|
| 14 |
-
"attention_dropout": 0.0,
|
| 15 |
-
"head_dim": 128,
|
| 16 |
-
"hidden_act": "silu",
|
| 17 |
-
"hidden_size": 2560,
|
| 18 |
-
"initializer_range": 0.02,
|
| 19 |
-
"intermediate_size": 9728,
|
| 20 |
-
"max_position_embeddings": 40960,
|
| 21 |
-
"max_window_layers": 36,
|
| 22 |
-
"model_type": "qwen3",
|
| 23 |
-
"num_attention_heads": 32,
|
| 24 |
-
"num_hidden_layers": 36,
|
| 25 |
-
"num_key_value_heads": 8,
|
| 26 |
-
"rms_norm_eps": 1e-06,
|
| 27 |
-
"rope_scaling": null,
|
| 28 |
-
"rope_theta": 1000000,
|
| 29 |
-
"sliding_window": null,
|
| 30 |
-
"torch_dtype": "bfloat16",
|
| 31 |
-
"use_cache": true,
|
| 32 |
-
"use_sliding_window": false,
|
| 33 |
-
"vocab_size": 151936,
|
| 34 |
-
"tie_word_embeddings": false
|
| 35 |
-
},
|
| 36 |
-
"load_precision": "bf16",
|
| 37 |
-
"max_length": 32,
|
| 38 |
-
"repetition_penalty": 1.2,
|
| 39 |
-
"run_dir": "qwen3-4b-instruct-continue-training+stage-finetune+x7",
|
| 40 |
-
"seed": 7,
|
| 41 |
-
"stage": "finetune",
|
| 42 |
-
"temperature": 0.0,
|
| 43 |
-
"top_p": 0.0001,
|
| 44 |
-
"torch_dtype": "bfloat16",
|
| 45 |
-
"trackers": [
|
| 46 |
-
"jsonl",
|
| 47 |
-
"wandb"
|
| 48 |
-
],
|
| 49 |
-
"transformers_version": "4.52.4",
|
| 50 |
-
"vision_config": {
|
| 51 |
-
"_name_or_path": ".",
|
| 52 |
-
"architectures": [
|
| 53 |
-
"SiglipModel"
|
| 54 |
-
],
|
| 55 |
-
"hidden_act": "gelu",
|
| 56 |
-
"attention_dropout": 0.0,
|
| 57 |
-
"hidden_size": 1152,
|
| 58 |
-
"image_size": 384,
|
| 59 |
-
"initializer_factor": 1.0,
|
| 60 |
-
"intermediate_size": 4304,
|
| 61 |
-
"layer_norm_eps": 1e-06,
|
| 62 |
-
"model_type": "siglip_vision_model",
|
| 63 |
-
"num_attention_heads": 16,
|
| 64 |
-
"num_channels": 3,
|
| 65 |
-
"num_hidden_layers": 27,
|
| 66 |
-
"patch_size": 14,
|
| 67 |
-
"torch_dtype": "bfloat16"
|
| 68 |
-
},
|
| 69 |
-
"wandb_entity": null,
|
| 70 |
-
"wandb_project": "mmpretrain"
|
| 71 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01a7c1602b680b7301e429c8e2006000308bdc2f34e79d4cad87c4ddeb0076b6
|
| 3 |
+
size 1675
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|