Instructions to use mjf-su/ADEnReward with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use mjf-su/ADEnReward with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="mjf-su/ADEnReward") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("mjf-su/ADEnReward") model = AutoModelForImageTextToText.from_pretrained("mjf-su/ADEnReward") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use mjf-su/ADEnReward with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "mjf-su/ADEnReward" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/ADEnReward", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/mjf-su/ADEnReward
- SGLang
How to use mjf-su/ADEnReward with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "mjf-su/ADEnReward" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/ADEnReward", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "mjf-su/ADEnReward" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mjf-su/ADEnReward", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use mjf-su/ADEnReward with Docker Model Runner:
docker model run hf.co/mjf-su/ADEnReward
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +202 -0
- .ipynb_checkpoints/config-checkpoint.json +68 -0
- README.md +68 -0
- added_tokens.json +28 -0
- chat_template.jinja +110 -0
- checkpoint-100/added_tokens.json +28 -0
- checkpoint-100/chat_template.jinja +110 -0
- checkpoint-100/config.json +68 -0
- checkpoint-100/generation_config.json +12 -0
- checkpoint-100/merges.txt +0 -0
- checkpoint-100/model-00001-of-00002.safetensors +3 -0
- checkpoint-100/model-00002-of-00002.safetensors +3 -0
- checkpoint-100/model.safetensors.index.json +721 -0
- checkpoint-100/optimizer.pt +3 -0
- checkpoint-100/rng_state_0.pth +3 -0
- checkpoint-100/rng_state_1.pth +3 -0
- checkpoint-100/rng_state_2.pth +3 -0
- checkpoint-100/rng_state_3.pth +3 -0
- checkpoint-100/scheduler.pt +3 -0
- checkpoint-100/special_tokens_map.json +31 -0
- checkpoint-100/tokenizer.json +3 -0
- checkpoint-100/tokenizer_config.json +244 -0
- checkpoint-100/trainer_state.json +342 -0
- checkpoint-100/training_args.bin +3 -0
- checkpoint-100/vocab.json +0 -0
- checkpoint-200/added_tokens.json +28 -0
- checkpoint-200/chat_template.jinja +110 -0
- checkpoint-200/config.json +68 -0
- checkpoint-200/generation_config.json +12 -0
- checkpoint-200/merges.txt +0 -0
- checkpoint-200/model-00001-of-00002.safetensors +3 -0
- checkpoint-200/model-00002-of-00002.safetensors +3 -0
- checkpoint-200/model.safetensors.index.json +721 -0
- checkpoint-200/optimizer.pt +3 -0
- checkpoint-200/rng_state_0.pth +3 -0
- checkpoint-200/rng_state_1.pth +3 -0
- checkpoint-200/rng_state_2.pth +3 -0
- checkpoint-200/rng_state_3.pth +3 -0
- checkpoint-200/scheduler.pt +3 -0
- checkpoint-200/special_tokens_map.json +31 -0
- checkpoint-200/tokenizer.json +3 -0
- checkpoint-200/tokenizer_config.json +244 -0
- checkpoint-200/trainer_state.json +650 -0
- checkpoint-200/training_args.bin +3 -0
- checkpoint-200/vocab.json +0 -0
- checkpoint-292/added_tokens.json +28 -0
- checkpoint-292/chat_template.jinja +110 -0
- checkpoint-292/config.json +68 -0
- checkpoint-292/generation_config.json +12 -0
- checkpoint-292/merges.txt +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,205 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
checkpoint-292/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
eval_image_cache/chunk_0108__07f9f468-02ac-4ec6-b83b-1755edc5cdce__1999996.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
eval_image_cache/chunk_0109__bbb36725-1b40-419e-9432-699b5bd0d6dc__10000051.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
eval_image_cache/chunk_0110__1a95715d-4ce6-47af-a95b-75fec0b326ee__6999997.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
eval_image_cache/chunk_0111__3935487c-e145-43a3-b54d-a220130db33d__3999988.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
eval_image_cache/chunk_0111__3935487c-e145-43a3-b54d-a220130db33d__5000041.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
eval_image_cache/chunk_0111__3935487c-e145-43a3-b54d-a220130db33d__7999972.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
eval_image_cache/chunk_0112__3e75f81b-748f-4ee5-aa74-3bc331df175b__2999908.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
eval_image_cache/chunk_0114__30a70a85-3cdf-4c02-ac15-0c92d1224b82__12000124.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
eval_image_cache/chunk_0114__30a70a85-3cdf-4c02-ac15-0c92d1224b82__2000001.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
eval_image_cache/chunk_0114__b7cf7e0e-dba6-4551-9a13-596297df1bba__8000088.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
eval_image_cache/chunk_0117__e785f4d9-8121-4ccb-a171-fb9c7277a316__9999629.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
eval_image_cache/chunk_0118__0c1a7bec-2ff6-4e5a-bf0e-2d766231ca5d__12000067.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
eval_image_cache/chunk_0118__1f23fc05-0eca-436d-b04e-fc6af9e10952__7999502.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
eval_image_cache/chunk_0120__2ad625e0-db47-4837-ac85-5c7868750021__12000141.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
eval_image_cache/chunk_0120__2ad625e0-db47-4837-ac85-5c7868750021__14000130.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
eval_image_cache/chunk_0120__2ad625e0-db47-4837-ac85-5c7868750021__9000080.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
eval_image_cache/chunk_0124__144495bb-8e60-4491-a587-869656f71b47__8999977.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
eval_image_cache/chunk_0126__2cd6a7c7-646d-4104-91f7-fd4ae19a4d51__3999994.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
eval_image_cache/chunk_0128__027a4871-e99c-4aae-be61-bce56cff84e6__7999983.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
eval_image_cache/chunk_0129__514963ec-4e0b-43c8-b620-a7187a63c997__3000003.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
eval_image_cache/chunk_0129__6c41ed3d-def2-4db3-95f2-325b80e97d41__5999994.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
eval_image_cache/chunk_0129__a6af5b85-cd62-4062-89e1-c5fd114bbbff__3999992.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
eval_image_cache/chunk_0129__a6af5b85-cd62-4062-89e1-c5fd114bbbff__5999990.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
eval_image_cache/chunk_0131__11271cef-d09e-4e46-8356-b546e35f1d22__3999966.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
eval_image_cache/chunk_0134__6759cd27-efbe-4835-a5f4-c0d8adf02243__11999986.png filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
eval_image_cache/chunk_0134__b03cdc34-a47b-414e-872a-bab168d55ba0__7999996.png filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
eval_image_cache/chunk_0134__b16f483e-ee72-4608-abf2-5c8239b8c46d__6000016.png filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
eval_image_cache/chunk_0135__82a0e604-bb05-41fb-8ad6-f24b3488494a__10000007.png filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
eval_image_cache/chunk_0255__384ae427-068b-4608-8ad2-274b19e21c50__10000162.png filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
eval_image_cache/chunk_0255__d964da47-9126-4a0d-81b5-447ff3d2f5ea__12000023.png filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
eval_image_cache/chunk_0256__b5fb222f-994d-46ca-ba64-d58ab0d9e552__5000038.png filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
eval_image_cache/chunk_0257__64672e20-c5cd-450f-af63-02e099a37ea0__8000105.png filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
eval_image_cache/chunk_0257__91b8ec37-00f9-4cfe-84b2-393e5ff8761f__11000090.png filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
eval_image_cache/chunk_0257__b019775a-94e7-485a-95d7-0aa1c631273a__13000080.png filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
eval_image_cache/chunk_0257__d346d1f8-a675-4323-865d-90f0a376d47d__1999924.png filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
eval_image_cache/chunk_0258__174f5269-71ad-4ff5-8c52-93fcd22bca08__12000160.png filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
eval_image_cache/chunk_0260__31db2104-7783-46c1-b2d0-76c841af9b65__11999976.png filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
eval_image_cache/chunk_0260__79262cc6-06dc-4f0f-a80a-7b85ba5d15ed__7000002.png filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
eval_image_cache/chunk_0261__fe440ea7-4a09-4467-9994-2430d735f5a9__8999977.png filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
eval_image_cache/chunk_0263__7a6fbf54-d533-428c-a182-557fef36cdcf__9999999.png filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
eval_image_cache/chunk_0265__af94a1d4-4786-4013-84d2-ae4900b6fe31__10999970.png filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
eval_image_cache/chunk_0347__174422ad-96a8-4808-9109-666a916d7db4__13000031.png filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
eval_image_cache/chunk_0347__1db9bc8f-6771-4c39-b4bd-6e752c376ce0__4999994.png filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
eval_image_cache/chunk_0347__6b7eb06a-3111-41ea-a93f-16b4b8aa38ff__6000009.png filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
eval_image_cache/chunk_0351__089aac8c-d8a8-4f17-bc5d-2ce4771a62b4__12999959.png filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
eval_image_cache/chunk_0352__80e59d8e-6543-4c24-bc11-34e50ef8dd5b__7999992.png filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
eval_image_cache/chunk_0586__a7763237-2ae2-4e9c-96b2-6664243c76ff__10000122.png filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
eval_image_cache/chunk_0587__51e2316b-30b9-4623-a177-2cd3b7999b6d__7999973.png filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
eval_image_cache/chunk_0588__472e7533-ffa0-4828-bb98-b9e679e4ff4f__11000101.png filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
eval_image_cache/chunk_0591__bcde195a-d402-4d71-90aa-84000183d3b4__5999999.png filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
eval_image_cache/chunk_0591__c2184364-65bc-47a3-9f39-c4586a1776d7__8999995.png filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
eval_image_cache/chunk_0592__f367b3f6-df4c-4084-b748-56dc21638a3f__11999996.png filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
eval_image_cache/chunk_0592__f367b3f6-df4c-4084-b748-56dc21638a3f__4999994.png filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
eval_image_cache/chunk_0593__a8425540-13aa-487c-8a4d-43df90e434f0__5999995.png filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
eval_image_cache/chunk_0595__485567af-f9fd-4ebd-aae8-67d9d684db49__3999957.png filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
eval_image_cache/chunk_0595__58828c5e-6104-4386-8aa2-3536c32894dc__6000002.png filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
eval_image_cache/chunk_0596__2b7fa081-c8ec-4083-80e9-4afb629ac707__1999991.png filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
eval_image_cache/chunk_0597__095856fb-112d-4d1c-bb53-5250a0f5cb76__11000014.png filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
eval_image_cache/chunk_0597__ee260900-1c26-4841-821e-435aeac9cfd3__8999979.png filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
eval_image_cache/chunk_0600__c0dfaa0c-58fb-4f6e-a806-3cced7170abc__8999956.png filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
eval_image_cache/chunk_0601__64d0502c-8a63-4e20-93e7-90cd5aa700d8__8000021.png filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
eval_image_cache/chunk_0601__b4754eda-96b8-4680-b980-c4dcaba7b43e__12000083.png filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
eval_image_cache/chunk_0602__5008d1f9-2f19-429c-8fc8-6c8652312504__8999996.png filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
eval_image_cache/chunk_0604__69fc21e8-6faf-487b-806f-1d24654805ce__4999984.png filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
eval_image_cache/chunk_0604__c97f7dda-b4d5-4db3-887b-03c489fd2650__8999995.png filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
eval_image_cache/chunk_0606__e3bf1780-f4eb-4f7f-99a6-e60f470ba734__2000007.png filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
eval_image_cache/chunk_0606__e3bf1780-f4eb-4f7f-99a6-e60f470ba734__9000140.png filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
eval_image_cache/chunk_0608__45731110-2cf7-46f3-bfc8-ca5f19c93537__6999993.png filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
eval_image_cache/chunk_0608__82371c52-7bd3-490d-93c4-f92974349435__9000012.png filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
eval_image_cache/chunk_0609__917f7182-f87f-428b-90ea-7c02153a2aa5__11000012.png filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
eval_image_cache/chunk_0618__99c62f9c-fbd5-4a06-ab6c-ad966e18d6c5__13999985.png filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
eval_image_cache/chunk_0618__b8516847-a9d3-45d6-81af-ac66942d9852__8999980.png filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
eval_image_cache/chunk_0618__fb343576-5115-472e-9f6b-37444e63b893__2000002.png filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
eval_image_cache/chunk_0621__459f0a6b-316e-4431-aefb-94277bb77a66__5000068.png filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
eval_image_cache/chunk_0621__8729f8f9-2f76-472f-99b1-ca9cffb37bb7__11999994.png filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
eval_image_cache/chunk_0622__178e7df8-9f8b-49a6-9cb4-35c26ebf0d78__2999989.png filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
eval_image_cache/chunk_0623__b2604a77-65f2-4dd4-b753-8062da42a174__12000129.png filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
eval_image_cache/chunk_0623__b2604a77-65f2-4dd4-b753-8062da42a174__8000113.png filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
eval_image_cache/chunk_0623__eddf79af-0642-45e7-838e-0235b1789628__7999973.png filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
eval_image_cache/chunk_0624__f8dfa81e-7c03-43a5-8380-291d8236958a__8999997.png filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
eval_image_cache/chunk_0625__03e18e8c-c261-4d2e-88a8-c04d94f53d0e__4000038.png filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
eval_image_cache/chunk_0625__03e18e8c-c261-4d2e-88a8-c04d94f53d0e__9000015.png filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
eval_image_cache/chunk_0627__184c1a92-7b7f-43b0-bca5-2282a7b757f6__12000012.png filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
eval_image_cache/chunk_0630__366299fa-20ac-4833-bce3-b8bc4e4d582f__2999985.png filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
eval_image_cache/chunk_0630__ac7fdf05-cfdb-4a6f-af3c-309ccef3b4f5__10999967.png filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
eval_image_cache/chunk_0630__d2a93ea6-012c-4b4e-a6f6-0ec5b419d28a__2999978.png filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
eval_image_cache/chunk_0632__438c37a2-6b39-400d-97de-06b5d5ae2995__6000003.png filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
eval_image_cache/chunk_0633__dfbbf996-f5d2-40e2-89f6-86566020d958__3999999.png filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
eval_image_cache/chunk_0635__9ee5b39a-a131-4160-9bf4-6439968d43a9__7000011.png filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
eval_image_cache/chunk_0635__e3153da1-65be-4eba-aa0a-ed28eb39493d__2000039.png filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
eval_image_cache/chunk_0637__98a1a789-378d-4cd1-964c-10e862268bd4__12000009.png filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
eval_image_cache/chunk_0638__25df96c1-62d4-46c6-b161-2ccef138945f__13999990.png filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
eval_image_cache/chunk_0640__0c210498-a149-4f4b-b372-e27227d2d5a1__7999994.png filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
eval_image_cache/chunk_0640__3725a28e-aefd-4efc-8507-8a23707e0aa7__13000039.png filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
eval_image_cache/chunk_0642__42c04a80-f888-4ae3-8dab-2e144a5ae305__13000023.png filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
eval_image_cache/chunk_0642__afe297e4-e866-46e9-804b-1bf3c41a2ff0__12999985.png filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
eval_image_cache/chunk_0642__be26ec39-4f58-478c-80a8-b9a709f80881__13000024.png filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
eval_image_cache/chunk_0643__cd62fa76-6efe-49d8-99a0-379e2782a6ad__2000016.png filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
eval_image_cache/chunk_0643__d25f9074-ee0b-4424-a4f5-dff6d6fc8890__7000001.png filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
eval_image_cache/chunk_0644__686912e3-8629-4589-a1ae-729d4714eea3__9999986.png filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
eval_image_cache/chunk_0648__0341e45a-e60a-4831-9976-23e113c0839c__9999972.png filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
eval_image_cache/chunk_0648__cfe53f5b-f0f1-4c70-b0d3-b722473f1d72__3000011.png filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
eval_image_cache/chunk_0649__73761faa-8f5e-4f41-b1f1-978b35429983__12000014.png filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
eval_image_cache/chunk_0652__2ef367e1-448e-45d1-8958-f94a7f3765dc__6000000.png filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
eval_image_cache/chunk_0652__c7a18f3e-e919-4c3c-99d2-adf63e396339__2999994.png filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
eval_image_cache/chunk_0652__d2909b0b-dac0-48dd-9cd1-4976b41a0ffd__10000029.png filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
eval_image_cache/chunk_0653__16a0ea82-dcea-4802-9f36-d335e4d47a93__10999994.png filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
eval_image_cache/chunk_0654__184727ec-0fc0-44df-8ba6-c821f2651363__13999997.png filter=lfs diff=lfs merge=lfs -text
|
| 147 |
+
eval_image_cache/chunk_0654__184727ec-0fc0-44df-8ba6-c821f2651363__4000010.png filter=lfs diff=lfs merge=lfs -text
|
| 148 |
+
eval_image_cache/chunk_0655__bd31d697-1c77-41ab-b740-dc6d46b55139__8999992.png filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
eval_image_cache/chunk_0655__d7e22e4f-9937-47cf-a3e2-cba399f9e436__5000021.png filter=lfs diff=lfs merge=lfs -text
|
| 150 |
+
eval_image_cache/chunk_0656__e62ae5fa-fcc2-4e43-bf74-81f6efe95fea__9000015.png filter=lfs diff=lfs merge=lfs -text
|
| 151 |
+
eval_image_cache/chunk_0657__2136a539-0e22-4382-8406-b9c646344aea__10999999.png filter=lfs diff=lfs merge=lfs -text
|
| 152 |
+
eval_image_cache/chunk_0657__4de6cb20-de4e-4006-8a75-e29153ac5abf__4999959.png filter=lfs diff=lfs merge=lfs -text
|
| 153 |
+
eval_image_cache/chunk_0661__7602813e-ff78-422b-b999-099cf222835c__9000026.png filter=lfs diff=lfs merge=lfs -text
|
| 154 |
+
eval_image_cache/chunk_0664__c0f0a4d4-98a7-411a-8697-765f3bf8a572__5000006.png filter=lfs diff=lfs merge=lfs -text
|
| 155 |
+
eval_image_cache/chunk_0664__e570c983-50f7-4544-85da-52b5176592b8__9000009.png filter=lfs diff=lfs merge=lfs -text
|
| 156 |
+
eval_image_cache/chunk_0666__10eddfef-8346-48cd-8f59-cab3df04f950__13999779.png filter=lfs diff=lfs merge=lfs -text
|
| 157 |
+
eval_image_cache/chunk_0666__4596da55-0671-46d4-b843-2c8299fd87aa__4000016.png filter=lfs diff=lfs merge=lfs -text
|
| 158 |
+
eval_image_cache/chunk_0668__38c69311-d520-41e3-befe-98ef3932f6ef__6999988.png filter=lfs diff=lfs merge=lfs -text
|
| 159 |
+
eval_image_cache/chunk_0668__38c69311-d520-41e3-befe-98ef3932f6ef__8999991.png filter=lfs diff=lfs merge=lfs -text
|
| 160 |
+
eval_image_cache/chunk_0671__a09199fa-a1d4-4eb0-a950-4ab55b4196a6__3999998.png filter=lfs diff=lfs merge=lfs -text
|
| 161 |
+
eval_image_cache/chunk_1057__43ad0fe8-9f11-4c34-bb2f-607fc5452c0a__7000007.png filter=lfs diff=lfs merge=lfs -text
|
| 162 |
+
eval_image_cache/chunk_1057__c99feaf4-1f8c-4fd7-b89e-06d63a935996__13000005.png filter=lfs diff=lfs merge=lfs -text
|
| 163 |
+
eval_image_cache/chunk_1059__06a68a48-2bfb-4673-a9d3-039fd62ff13d__2999986.png filter=lfs diff=lfs merge=lfs -text
|
| 164 |
+
eval_image_cache/chunk_1059__cb060b90-369f-40ce-85da-11f94dcb0b59__3999992.png filter=lfs diff=lfs merge=lfs -text
|
| 165 |
+
eval_image_cache/chunk_1059__f53afcd9-5f33-4b98-a40a-7e14ba667969__10999988.png filter=lfs diff=lfs merge=lfs -text
|
| 166 |
+
eval_image_cache/chunk_1063__7fc0c615-6e87-4efc-8000-98a59904c2c7__10999983.png filter=lfs diff=lfs merge=lfs -text
|
| 167 |
+
eval_image_cache/chunk_1064__00f79de8-de3f-4153-bc0d-490e0d8633a9__9999979.png filter=lfs diff=lfs merge=lfs -text
|
| 168 |
+
eval_image_cache/chunk_1064__952a3c9b-54e8-427a-87b8-486d8b76e4bd__10999984.png filter=lfs diff=lfs merge=lfs -text
|
| 169 |
+
eval_image_cache/chunk_1066__1af5e4de-606c-4759-b516-f383f1962f48__6000012.png filter=lfs diff=lfs merge=lfs -text
|
| 170 |
+
eval_image_cache/chunk_1066__76a36af4-0486-4ca5-b4a6-4dca980f627b__7999987.png filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
eval_image_cache/chunk_1066__9ece6473-5520-4b5f-9a15-13ac9576e005__7999945.png filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
eval_image_cache/chunk_1066__e3efea50-1cda-45ce-a23a-1fd7e21b96f7__13999996.png filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
eval_image_cache/chunk_1067__2e72746e-12f3-4d1c-acae-0fa677567b48__8999988.png filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
eval_image_cache/chunk_1067__e6838c21-2c66-4569-97eb-374be6418db2__4999981.png filter=lfs diff=lfs merge=lfs -text
|
| 175 |
+
eval_image_cache/chunk_1070__38fd4fd1-16d0-445d-bb26-850f3c395ae0__11000005.png filter=lfs diff=lfs merge=lfs -text
|
| 176 |
+
eval_image_cache/chunk_1070__38fd4fd1-16d0-445d-bb26-850f3c395ae0__7999992.png filter=lfs diff=lfs merge=lfs -text
|
| 177 |
+
eval_image_cache/chunk_1070__6ad73485-5c49-4b7d-8d77-f5da88d21b9c__6999990.png filter=lfs diff=lfs merge=lfs -text
|
| 178 |
+
eval_image_cache/chunk_1071__79a7d106-aaec-4b13-ae27-62db2abf274f__7999989.png filter=lfs diff=lfs merge=lfs -text
|
| 179 |
+
eval_image_cache/chunk_1071__f96e2317-e91f-4d02-8c56-e4d53d67dc8e__6999988.png filter=lfs diff=lfs merge=lfs -text
|
| 180 |
+
eval_image_cache/chunk_1073__63f4b7d2-ac47-4c8a-8e63-bcfaa4fb0c0e__9999978.png filter=lfs diff=lfs merge=lfs -text
|
| 181 |
+
eval_image_cache/chunk_1076__423b597a-90cf-4c16-984f-7c245aceed8e__2999998.png filter=lfs diff=lfs merge=lfs -text
|
| 182 |
+
eval_image_cache/chunk_1079__fce10c54-03a8-4b3f-964d-52060bd354ee__5999999.png filter=lfs diff=lfs merge=lfs -text
|
| 183 |
+
eval_image_cache/chunk_1080__e7f947ae-8c90-49a5-a5e3-04e32476a3ea__5999971.png filter=lfs diff=lfs merge=lfs -text
|
| 184 |
+
eval_image_cache/chunk_1081__c0e4f144-eef3-4be4-855d-672062369aa9__7000013.png filter=lfs diff=lfs merge=lfs -text
|
| 185 |
+
eval_image_cache/chunk_1082__f9eb98a0-5d14-44f1-ac97-f585ac2a39b3__13000028.png filter=lfs diff=lfs merge=lfs -text
|
| 186 |
+
eval_image_cache/chunk_1085__7ca38136-485f-44e9-804f-5f3936350402__9999988.png filter=lfs diff=lfs merge=lfs -text
|
| 187 |
+
eval_image_cache/chunk_1085__dd6c9afd-7fdb-4499-b724-122cbf402eb8__10999985.png filter=lfs diff=lfs merge=lfs -text
|
| 188 |
+
eval_image_cache/chunk_1086__4e8cbffd-f20d-43ee-af9e-14abb0c172ad__10000000.png filter=lfs diff=lfs merge=lfs -text
|
| 189 |
+
eval_image_cache/chunk_1087__8268bbad-94e1-4dd7-ad8b-f0023c4ad558__9999991.png filter=lfs diff=lfs merge=lfs -text
|
| 190 |
+
eval_image_cache/chunk_1089__d78d5e65-f00e-4dc4-8365-8ce810ac06dc__6999973.png filter=lfs diff=lfs merge=lfs -text
|
| 191 |
+
eval_image_cache/chunk_1091__166b1e4e-1c73-4835-b0be-c7781a103c86__11000003.png filter=lfs diff=lfs merge=lfs -text
|
| 192 |
+
eval_image_cache/chunk_1091__ccb5acd0-778a-4fc5-92ce-3ea86e31caf1__13999987.png filter=lfs diff=lfs merge=lfs -text
|
| 193 |
+
eval_image_cache/chunk_1091__fd994f87-3c39-4ff4-a1f8-0f1e5f4c6575__2000018.png filter=lfs diff=lfs merge=lfs -text
|
| 194 |
+
eval_image_cache/chunk_1092__265e9592-5b24-4f96-9b6f-38ce95a75d1d__12999968.png filter=lfs diff=lfs merge=lfs -text
|
| 195 |
+
eval_image_cache/chunk_1092__265e9592-5b24-4f96-9b6f-38ce95a75d1d__3999990.png filter=lfs diff=lfs merge=lfs -text
|
| 196 |
+
eval_image_cache/chunk_1094__49225bce-c30c-48b3-82a9-82b50b682997__8999994.png filter=lfs diff=lfs merge=lfs -text
|
| 197 |
+
eval_image_cache/chunk_1094__bb9354fd-394d-45a4-b980-00a3b2bf12d1__7999777.png filter=lfs diff=lfs merge=lfs -text
|
| 198 |
+
eval_image_cache/chunk_1095__130ffa81-5524-4ea6-a434-50a1e100c7b4__12999963.png filter=lfs diff=lfs merge=lfs -text
|
| 199 |
+
eval_image_cache/chunk_1095__130ffa81-5524-4ea6-a434-50a1e100c7b4__9999976.png filter=lfs diff=lfs merge=lfs -text
|
| 200 |
+
eval_image_cache/chunk_1098__d38a8e3a-175e-4042-a609-467459cb0a76__11000013.png filter=lfs diff=lfs merge=lfs -text
|
| 201 |
+
eval_image_cache/chunk_1102__a8eb19ea-88eb-43a3-a916-4d500dc205e7__13000013.png filter=lfs diff=lfs merge=lfs -text
|
| 202 |
+
eval_image_cache/chunk_1103__583582a6-c323-4bfb-b461-53f175805dc3__13999981.png filter=lfs diff=lfs merge=lfs -text
|
| 203 |
+
eval_image_cache/chunk_1103__f3dd9de0-ddfe-4052-9fc5-862f549ee9ab__14000347.png filter=lfs diff=lfs merge=lfs -text
|
| 204 |
+
eval_image_cache/chunk_2771__59f384c5-8a39-47ca-a922-13a1f348ea4c__12000038.png filter=lfs diff=lfs merge=lfs -text
|
| 205 |
+
eval_image_cache/chunk_2772__7b468eaf-05aa-4a34-820f-74a82e94d24a__12999975.png filter=lfs diff=lfs merge=lfs -text
|
| 206 |
+
eval_image_cache/chunk_2773__7de88e93-719b-4afd-a647-6477bb12932d__8999987.png filter=lfs diff=lfs merge=lfs -text
|
| 207 |
+
eval_image_cache/chunk_2774__4c4403a0-bca1-4dbb-8862-9feb046f9cb8__3000011.png filter=lfs diff=lfs merge=lfs -text
|
| 208 |
+
eval_image_cache/chunk_2776__35582a89-49cf-4ed7-8c7f-518f58eda4c9__13000006.png filter=lfs diff=lfs merge=lfs -text
|
| 209 |
+
eval_image_cache/chunk_2777__52a1e79f-39bf-4ffd-bec7-d1bd9e6ea667__10999989.png filter=lfs diff=lfs merge=lfs -text
|
| 210 |
+
eval_image_cache/chunk_2777__8847117c-066a-476c-889b-3940d6ca178b__8999975.png filter=lfs diff=lfs merge=lfs -text
|
| 211 |
+
eval_image_cache/chunk_2778__66e38562-7434-42c8-b97c-210afbf97016__9000010.png filter=lfs diff=lfs merge=lfs -text
|
| 212 |
+
eval_image_cache/chunk_2779__44df6d40-5f86-4ecc-8432-265e2beeac7e__10999850.png filter=lfs diff=lfs merge=lfs -text
|
| 213 |
+
eval_image_cache/chunk_2779__81967a46-b230-45f6-a56f-730a70ddf7b5__10000000.png filter=lfs diff=lfs merge=lfs -text
|
| 214 |
+
eval_image_cache/chunk_2780__d573ca03-a457-446a-8243-433a28d910d0__9000008.png filter=lfs diff=lfs merge=lfs -text
|
| 215 |
+
eval_image_cache/chunk_2794__e8f8ed0b-abd0-40fc-9889-512f9eebfa2c__8000013.png filter=lfs diff=lfs merge=lfs -text
|
| 216 |
+
eval_image_cache/chunk_2795__07233c4b-9fab-481c-847c-4867513586c0__9999997.png filter=lfs diff=lfs merge=lfs -text
|
| 217 |
+
eval_image_cache/chunk_2796__eb87b038-404e-4f00-b638-98ff7b40ac00__2000001.png filter=lfs diff=lfs merge=lfs -text
|
| 218 |
+
eval_image_cache/chunk_2797__365c37a2-94cd-40de-b94b-1674e0ef408c__13000090.png filter=lfs diff=lfs merge=lfs -text
|
| 219 |
+
eval_image_cache/chunk_2798__39bd6901-3a99-47dc-992e-e364346247a0__1999997.png filter=lfs diff=lfs merge=lfs -text
|
| 220 |
+
eval_image_cache/chunk_2799__29c4cd40-d260-488e-9d1d-f8a3cf952c2a__11000008.png filter=lfs diff=lfs merge=lfs -text
|
| 221 |
+
eval_image_cache/chunk_2799__29c4cd40-d260-488e-9d1d-f8a3cf952c2a__7000012.png filter=lfs diff=lfs merge=lfs -text
|
| 222 |
+
eval_image_cache/chunk_3020__203421ee-e75c-41c4-9dbe-34f2f7adf79c__10999997.png filter=lfs diff=lfs merge=lfs -text
|
| 223 |
+
eval_image_cache/chunk_3020__56f16fff-6c63-4511-a971-ca043d18a4aa__10999988.png filter=lfs diff=lfs merge=lfs -text
|
| 224 |
+
eval_image_cache/chunk_3020__b4239ff9-aaac-4f0e-b307-fdbe53a94d11__6000006.png filter=lfs diff=lfs merge=lfs -text
|
| 225 |
+
eval_image_cache/chunk_3021__a54f8f11-fab1-4add-94e6-4236ee63cee9__12000006.png filter=lfs diff=lfs merge=lfs -text
|
| 226 |
+
eval_image_cache/chunk_3021__aea860ff-6b70-456d-beba-d0a83ecd0bd9__4999991.png filter=lfs diff=lfs merge=lfs -text
|
| 227 |
+
eval_image_cache/chunk_3022__1df7dea7-061b-4edf-8fcd-48296dbb2287__12999975.png filter=lfs diff=lfs merge=lfs -text
|
| 228 |
+
eval_image_cache/chunk_3080__62fe5e69-b0f8-446c-bd4b-1e5b19f1981e__3999996.png filter=lfs diff=lfs merge=lfs -text
|
| 229 |
+
eval_image_cache/chunk_3080__d6b6653b-77cb-4bf2-a751-f82be313ff56__9999978.png filter=lfs diff=lfs merge=lfs -text
|
| 230 |
+
eval_image_cache/chunk_3081__4024607b-3590-4316-8c55-8e644c70b3a7__5999998.png filter=lfs diff=lfs merge=lfs -text
|
| 231 |
+
eval_image_cache/chunk_3081__44f595a6-e638-456c-92ca-fd8b7e2a631b__8999936.png filter=lfs diff=lfs merge=lfs -text
|
| 232 |
+
eval_image_cache/chunk_3082__29b1de86-ca2e-47e5-9f4b-846892485ec4__6000114.png filter=lfs diff=lfs merge=lfs -text
|
| 233 |
+
eval_image_cache/chunk_3083__e0fd41f6-1e4b-4a1e-a1ea-7ac0d0b3e1a0__14000108.png filter=lfs diff=lfs merge=lfs -text
|
| 234 |
+
eval_image_cache/chunk_3084__76712ddc-0849-4345-89ce-2354c1e26612__12000056.png filter=lfs diff=lfs merge=lfs -text
|
| 235 |
+
eval_image_cache/chunk_3084__9f5ff8ae-5e43-4116-94e1-202906ee17bd__8001827.png filter=lfs diff=lfs merge=lfs -text
|
| 236 |
+
eval_image_cache/chunk_3087__c44bb8ad-a913-494d-88a1-987b6e8bfdf3__8000142.png filter=lfs diff=lfs merge=lfs -text
|
| 237 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
.ipynb_checkpoints/config-checkpoint.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3VLForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"eos_token_id": 151645,
|
| 7 |
+
"image_token_id": 151655,
|
| 8 |
+
"model_type": "qwen3_vl",
|
| 9 |
+
"pad_token_id": 151643,
|
| 10 |
+
"text_config": {
|
| 11 |
+
"attention_bias": false,
|
| 12 |
+
"attention_dropout": 0.0,
|
| 13 |
+
"bos_token_id": 151643,
|
| 14 |
+
"dtype": "bfloat16",
|
| 15 |
+
"eos_token_id": 151645,
|
| 16 |
+
"head_dim": 128,
|
| 17 |
+
"hidden_act": "silu",
|
| 18 |
+
"hidden_size": 2560,
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 9728,
|
| 21 |
+
"max_position_embeddings": 262144,
|
| 22 |
+
"model_type": "qwen3_vl_text",
|
| 23 |
+
"num_attention_heads": 32,
|
| 24 |
+
"num_hidden_layers": 36,
|
| 25 |
+
"num_key_value_heads": 8,
|
| 26 |
+
"rms_norm_eps": 1e-06,
|
| 27 |
+
"rope_scaling": {
|
| 28 |
+
"mrope_interleaved": true,
|
| 29 |
+
"mrope_section": [
|
| 30 |
+
24,
|
| 31 |
+
20,
|
| 32 |
+
20
|
| 33 |
+
],
|
| 34 |
+
"rope_type": "default"
|
| 35 |
+
},
|
| 36 |
+
"rope_theta": 5000000,
|
| 37 |
+
"tie_word_embeddings": true,
|
| 38 |
+
"use_cache": true,
|
| 39 |
+
"vocab_size": 151936
|
| 40 |
+
},
|
| 41 |
+
"tie_word_embeddings": true,
|
| 42 |
+
"transformers_version": "4.57.6",
|
| 43 |
+
"use_cache": false,
|
| 44 |
+
"video_token_id": 151656,
|
| 45 |
+
"vision_config": {
|
| 46 |
+
"deepstack_visual_indexes": [
|
| 47 |
+
5,
|
| 48 |
+
11,
|
| 49 |
+
17
|
| 50 |
+
],
|
| 51 |
+
"depth": 24,
|
| 52 |
+
"dtype": "bfloat16",
|
| 53 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 54 |
+
"hidden_size": 1024,
|
| 55 |
+
"in_channels": 3,
|
| 56 |
+
"initializer_range": 0.02,
|
| 57 |
+
"intermediate_size": 4096,
|
| 58 |
+
"model_type": "qwen3_vl",
|
| 59 |
+
"num_heads": 16,
|
| 60 |
+
"num_position_embeddings": 2304,
|
| 61 |
+
"out_hidden_size": 2560,
|
| 62 |
+
"patch_size": 16,
|
| 63 |
+
"spatial_merge_size": 2,
|
| 64 |
+
"temporal_patch_size": 2
|
| 65 |
+
},
|
| 66 |
+
"vision_end_token_id": 151653,
|
| 67 |
+
"vision_start_token_id": 151652
|
| 68 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: mjf-su/PhysicalAI-reason-VLA-MetaAction-1e
|
| 3 |
+
library_name: transformers
|
| 4 |
+
model_name: ADEnReward
|
| 5 |
+
tags:
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- trl
|
| 8 |
+
- grpo
|
| 9 |
+
licence: license
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Model Card for ADEnReward
|
| 13 |
+
|
| 14 |
+
This model is a fine-tuned version of [mjf-su/PhysicalAI-reason-VLA-MetaAction-1e](https://huggingface.co/mjf-su/PhysicalAI-reason-VLA-MetaAction-1e).
|
| 15 |
+
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 16 |
+
|
| 17 |
+
## Quick start
|
| 18 |
+
|
| 19 |
+
```python
|
| 20 |
+
from transformers import pipeline
|
| 21 |
+
|
| 22 |
+
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
| 23 |
+
generator = pipeline("text-generation", model="None", device="cuda")
|
| 24 |
+
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
| 25 |
+
print(output["generated_text"])
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## Training procedure
|
| 29 |
+
|
| 30 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/mjf-su-stanford-university/GRPO-faithfulness/runs/hehnoca8)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
|
| 34 |
+
|
| 35 |
+
### Framework versions
|
| 36 |
+
|
| 37 |
+
- TRL: 0.26.1
|
| 38 |
+
- Transformers: 4.57.6
|
| 39 |
+
- Pytorch: 2.10.0
|
| 40 |
+
- Datasets: 4.4.1
|
| 41 |
+
- Tokenizers: 0.22.1
|
| 42 |
+
|
| 43 |
+
## Citations
|
| 44 |
+
|
| 45 |
+
Cite GRPO as:
|
| 46 |
+
|
| 47 |
+
```bibtex
|
| 48 |
+
@article{shao2024deepseekmath,
|
| 49 |
+
title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
|
| 50 |
+
author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
|
| 51 |
+
year = 2024,
|
| 52 |
+
eprint = {arXiv:2402.03300},
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
Cite TRL as:
|
| 58 |
+
|
| 59 |
+
```bibtex
|
| 60 |
+
@misc{vonwerra2022trl,
|
| 61 |
+
title = {{TRL: Transformer Reinforcement Learning}},
|
| 62 |
+
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
|
| 63 |
+
year = 2020,
|
| 64 |
+
journal = {GitHub repository},
|
| 65 |
+
publisher = {GitHub},
|
| 66 |
+
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 67 |
+
}
|
| 68 |
+
```
|
added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- set image_count = namespace(value=0) %}
|
| 2 |
+
{%- set video_count = namespace(value=0) %}
|
| 3 |
+
{%- macro render_content(content, do_vision_count) %}
|
| 4 |
+
{%- if content is string %}
|
| 5 |
+
{{- content }}
|
| 6 |
+
{%- else %}
|
| 7 |
+
{%- for item in content %}
|
| 8 |
+
{%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
|
| 9 |
+
{%- if do_vision_count %}
|
| 10 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 11 |
+
{%- endif %}
|
| 12 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 13 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 14 |
+
{%- elif 'video' in item or item.type == 'video' %}
|
| 15 |
+
{%- if do_vision_count %}
|
| 16 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 17 |
+
{%- endif %}
|
| 18 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 19 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 20 |
+
{%- elif 'text' in item %}
|
| 21 |
+
{{- item.text }}
|
| 22 |
+
{%- endif %}
|
| 23 |
+
{%- endfor %}
|
| 24 |
+
{%- endif %}
|
| 25 |
+
{%- endmacro %}
|
| 26 |
+
{%- if tools %}
|
| 27 |
+
{{- '<|im_start|>system\n' }}
|
| 28 |
+
{%- if messages[0].role == 'system' %}
|
| 29 |
+
{{- render_content(messages[0].content, false) + '\n\n' }}
|
| 30 |
+
{%- endif %}
|
| 31 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 32 |
+
{%- for tool in tools %}
|
| 33 |
+
{{- "\n" }}
|
| 34 |
+
{{- tool | tojson }}
|
| 35 |
+
{%- endfor %}
|
| 36 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 37 |
+
{%- else %}
|
| 38 |
+
{%- if messages[0].role == 'system' %}
|
| 39 |
+
{{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
|
| 40 |
+
{%- endif %}
|
| 41 |
+
{%- endif %}
|
| 42 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 43 |
+
{%- for message in messages[::-1] %}
|
| 44 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 45 |
+
{%- if ns.multi_step_tool and message.role == "user" %}
|
| 46 |
+
{%- set content = render_content(message.content, false) %}
|
| 47 |
+
{%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
|
| 48 |
+
{%- set ns.multi_step_tool = false %}
|
| 49 |
+
{%- set ns.last_query_index = index %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endif %}
|
| 52 |
+
{%- endfor %}
|
| 53 |
+
{%- for message in messages %}
|
| 54 |
+
{%- set content = render_content(message.content, True) %}
|
| 55 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 56 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 57 |
+
{%- elif message.role == "assistant" %}
|
| 58 |
+
{%- set reasoning_content = '' %}
|
| 59 |
+
{%- if message.reasoning_content is string %}
|
| 60 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{%- if '</think>' in content %}
|
| 63 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 64 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 65 |
+
{%- endif %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 68 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 69 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 70 |
+
{%- else %}
|
| 71 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{%- else %}
|
| 74 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 75 |
+
{%- endif %}
|
| 76 |
+
{%- if message.tool_calls %}
|
| 77 |
+
{%- for tool_call in message.tool_calls %}
|
| 78 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 79 |
+
{{- '\n' }}
|
| 80 |
+
{%- endif %}
|
| 81 |
+
{%- if tool_call.function %}
|
| 82 |
+
{%- set tool_call = tool_call.function %}
|
| 83 |
+
{%- endif %}
|
| 84 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 85 |
+
{{- tool_call.name }}
|
| 86 |
+
{{- '", "arguments": ' }}
|
| 87 |
+
{%- if tool_call.arguments is string %}
|
| 88 |
+
{{- tool_call.arguments }}
|
| 89 |
+
{%- else %}
|
| 90 |
+
{{- tool_call.arguments | tojson }}
|
| 91 |
+
{%- endif %}
|
| 92 |
+
{{- '}\n</tool_call>' }}
|
| 93 |
+
{%- endfor %}
|
| 94 |
+
{%- endif %}
|
| 95 |
+
{{- '<|im_end|>\n' }}
|
| 96 |
+
{%- elif message.role == "tool" %}
|
| 97 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 98 |
+
{{- '<|im_start|>user' }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{{- '\n<tool_response>\n' }}
|
| 101 |
+
{{- content }}
|
| 102 |
+
{{- '\n</tool_response>' }}
|
| 103 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 104 |
+
{{- '<|im_end|>\n' }}
|
| 105 |
+
{%- endif %}
|
| 106 |
+
{%- endif %}
|
| 107 |
+
{%- endfor %}
|
| 108 |
+
{%- if add_generation_prompt %}
|
| 109 |
+
{{- '<|im_start|>assistant\n' }}
|
| 110 |
+
{%- endif %}
|
checkpoint-100/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
checkpoint-100/chat_template.jinja
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- set image_count = namespace(value=0) %}
|
| 2 |
+
{%- set video_count = namespace(value=0) %}
|
| 3 |
+
{%- macro render_content(content, do_vision_count) %}
|
| 4 |
+
{%- if content is string %}
|
| 5 |
+
{{- content }}
|
| 6 |
+
{%- else %}
|
| 7 |
+
{%- for item in content %}
|
| 8 |
+
{%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
|
| 9 |
+
{%- if do_vision_count %}
|
| 10 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 11 |
+
{%- endif %}
|
| 12 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 13 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 14 |
+
{%- elif 'video' in item or item.type == 'video' %}
|
| 15 |
+
{%- if do_vision_count %}
|
| 16 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 17 |
+
{%- endif %}
|
| 18 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 19 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 20 |
+
{%- elif 'text' in item %}
|
| 21 |
+
{{- item.text }}
|
| 22 |
+
{%- endif %}
|
| 23 |
+
{%- endfor %}
|
| 24 |
+
{%- endif %}
|
| 25 |
+
{%- endmacro %}
|
| 26 |
+
{%- if tools %}
|
| 27 |
+
{{- '<|im_start|>system\n' }}
|
| 28 |
+
{%- if messages[0].role == 'system' %}
|
| 29 |
+
{{- render_content(messages[0].content, false) + '\n\n' }}
|
| 30 |
+
{%- endif %}
|
| 31 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 32 |
+
{%- for tool in tools %}
|
| 33 |
+
{{- "\n" }}
|
| 34 |
+
{{- tool | tojson }}
|
| 35 |
+
{%- endfor %}
|
| 36 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 37 |
+
{%- else %}
|
| 38 |
+
{%- if messages[0].role == 'system' %}
|
| 39 |
+
{{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
|
| 40 |
+
{%- endif %}
|
| 41 |
+
{%- endif %}
|
| 42 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 43 |
+
{%- for message in messages[::-1] %}
|
| 44 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 45 |
+
{%- if ns.multi_step_tool and message.role == "user" %}
|
| 46 |
+
{%- set content = render_content(message.content, false) %}
|
| 47 |
+
{%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
|
| 48 |
+
{%- set ns.multi_step_tool = false %}
|
| 49 |
+
{%- set ns.last_query_index = index %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endif %}
|
| 52 |
+
{%- endfor %}
|
| 53 |
+
{%- for message in messages %}
|
| 54 |
+
{%- set content = render_content(message.content, True) %}
|
| 55 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 56 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 57 |
+
{%- elif message.role == "assistant" %}
|
| 58 |
+
{%- set reasoning_content = '' %}
|
| 59 |
+
{%- if message.reasoning_content is string %}
|
| 60 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{%- if '</think>' in content %}
|
| 63 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 64 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 65 |
+
{%- endif %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 68 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 69 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 70 |
+
{%- else %}
|
| 71 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{%- else %}
|
| 74 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 75 |
+
{%- endif %}
|
| 76 |
+
{%- if message.tool_calls %}
|
| 77 |
+
{%- for tool_call in message.tool_calls %}
|
| 78 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 79 |
+
{{- '\n' }}
|
| 80 |
+
{%- endif %}
|
| 81 |
+
{%- if tool_call.function %}
|
| 82 |
+
{%- set tool_call = tool_call.function %}
|
| 83 |
+
{%- endif %}
|
| 84 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 85 |
+
{{- tool_call.name }}
|
| 86 |
+
{{- '", "arguments": ' }}
|
| 87 |
+
{%- if tool_call.arguments is string %}
|
| 88 |
+
{{- tool_call.arguments }}
|
| 89 |
+
{%- else %}
|
| 90 |
+
{{- tool_call.arguments | tojson }}
|
| 91 |
+
{%- endif %}
|
| 92 |
+
{{- '}\n</tool_call>' }}
|
| 93 |
+
{%- endfor %}
|
| 94 |
+
{%- endif %}
|
| 95 |
+
{{- '<|im_end|>\n' }}
|
| 96 |
+
{%- elif message.role == "tool" %}
|
| 97 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 98 |
+
{{- '<|im_start|>user' }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{{- '\n<tool_response>\n' }}
|
| 101 |
+
{{- content }}
|
| 102 |
+
{{- '\n</tool_response>' }}
|
| 103 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 104 |
+
{{- '<|im_end|>\n' }}
|
| 105 |
+
{%- endif %}
|
| 106 |
+
{%- endif %}
|
| 107 |
+
{%- endfor %}
|
| 108 |
+
{%- if add_generation_prompt %}
|
| 109 |
+
{{- '<|im_start|>assistant\n' }}
|
| 110 |
+
{%- endif %}
|
checkpoint-100/config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3VLForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"eos_token_id": 151645,
|
| 7 |
+
"image_token_id": 151655,
|
| 8 |
+
"model_type": "qwen3_vl",
|
| 9 |
+
"pad_token_id": 151643,
|
| 10 |
+
"text_config": {
|
| 11 |
+
"attention_bias": false,
|
| 12 |
+
"attention_dropout": 0.0,
|
| 13 |
+
"bos_token_id": 151643,
|
| 14 |
+
"dtype": "bfloat16",
|
| 15 |
+
"eos_token_id": 151645,
|
| 16 |
+
"head_dim": 128,
|
| 17 |
+
"hidden_act": "silu",
|
| 18 |
+
"hidden_size": 2560,
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 9728,
|
| 21 |
+
"max_position_embeddings": 262144,
|
| 22 |
+
"model_type": "qwen3_vl_text",
|
| 23 |
+
"num_attention_heads": 32,
|
| 24 |
+
"num_hidden_layers": 36,
|
| 25 |
+
"num_key_value_heads": 8,
|
| 26 |
+
"rms_norm_eps": 1e-06,
|
| 27 |
+
"rope_scaling": {
|
| 28 |
+
"mrope_interleaved": true,
|
| 29 |
+
"mrope_section": [
|
| 30 |
+
24,
|
| 31 |
+
20,
|
| 32 |
+
20
|
| 33 |
+
],
|
| 34 |
+
"rope_type": "default"
|
| 35 |
+
},
|
| 36 |
+
"rope_theta": 5000000,
|
| 37 |
+
"tie_word_embeddings": true,
|
| 38 |
+
"use_cache": true,
|
| 39 |
+
"vocab_size": 151936
|
| 40 |
+
},
|
| 41 |
+
"tie_word_embeddings": true,
|
| 42 |
+
"transformers_version": "4.57.6",
|
| 43 |
+
"use_cache": false,
|
| 44 |
+
"video_token_id": 151656,
|
| 45 |
+
"vision_config": {
|
| 46 |
+
"deepstack_visual_indexes": [
|
| 47 |
+
5,
|
| 48 |
+
11,
|
| 49 |
+
17
|
| 50 |
+
],
|
| 51 |
+
"depth": 24,
|
| 52 |
+
"dtype": "bfloat16",
|
| 53 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 54 |
+
"hidden_size": 1024,
|
| 55 |
+
"in_channels": 3,
|
| 56 |
+
"initializer_range": 0.02,
|
| 57 |
+
"intermediate_size": 4096,
|
| 58 |
+
"model_type": "qwen3_vl",
|
| 59 |
+
"num_heads": 16,
|
| 60 |
+
"num_position_embeddings": 2304,
|
| 61 |
+
"out_hidden_size": 2560,
|
| 62 |
+
"patch_size": 16,
|
| 63 |
+
"spatial_merge_size": 2,
|
| 64 |
+
"temporal_patch_size": 2
|
| 65 |
+
},
|
| 66 |
+
"vision_end_token_id": 151653,
|
| 67 |
+
"vision_start_token_id": 151652
|
| 68 |
+
}
|
checkpoint-100/generation_config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_sample": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151645,
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"top_k": 20,
|
| 10 |
+
"top_p": 0.95,
|
| 11 |
+
"transformers_version": "4.57.6"
|
| 12 |
+
}
|
checkpoint-100/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-100/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c88d0388e3064e45d449c2d34c44612c2a9f49f0efd9ae9a08f05c8beea2d45
|
| 3 |
+
size 4990497880
|
checkpoint-100/model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7fdf6cd292be65e7c3b96f690f57d91d8dd43128ce0271a25f996153f01279c
|
| 3 |
+
size 3885221448
|
checkpoint-100/model.safetensors.index.json
ADDED
|
@@ -0,0 +1,721 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_parameters": 4437815808,
|
| 4 |
+
"total_size": 8875631616
|
| 5 |
+
},
|
| 6 |
+
"weight_map": {
|
| 7 |
+
"model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
| 8 |
+
"model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 9 |
+
"model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 10 |
+
"model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 11 |
+
"model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 12 |
+
"model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 13 |
+
"model.language_model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 14 |
+
"model.language_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 15 |
+
"model.language_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 16 |
+
"model.language_model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 17 |
+
"model.language_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 18 |
+
"model.language_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 19 |
+
"model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 20 |
+
"model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 21 |
+
"model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 22 |
+
"model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 23 |
+
"model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 24 |
+
"model.language_model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 25 |
+
"model.language_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 26 |
+
"model.language_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 27 |
+
"model.language_model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 28 |
+
"model.language_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 29 |
+
"model.language_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 30 |
+
"model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 31 |
+
"model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 32 |
+
"model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 33 |
+
"model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 34 |
+
"model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 35 |
+
"model.language_model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 36 |
+
"model.language_model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 37 |
+
"model.language_model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 38 |
+
"model.language_model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 39 |
+
"model.language_model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 40 |
+
"model.language_model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 41 |
+
"model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 42 |
+
"model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 43 |
+
"model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 44 |
+
"model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 45 |
+
"model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 46 |
+
"model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 47 |
+
"model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 48 |
+
"model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 49 |
+
"model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 50 |
+
"model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 51 |
+
"model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 52 |
+
"model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 53 |
+
"model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 54 |
+
"model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 55 |
+
"model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 56 |
+
"model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 57 |
+
"model.language_model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 58 |
+
"model.language_model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 59 |
+
"model.language_model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 60 |
+
"model.language_model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 61 |
+
"model.language_model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 62 |
+
"model.language_model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 63 |
+
"model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 64 |
+
"model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 65 |
+
"model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 66 |
+
"model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 67 |
+
"model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 68 |
+
"model.language_model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 69 |
+
"model.language_model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 70 |
+
"model.language_model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 71 |
+
"model.language_model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 72 |
+
"model.language_model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 73 |
+
"model.language_model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 74 |
+
"model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 75 |
+
"model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 76 |
+
"model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 77 |
+
"model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 78 |
+
"model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 79 |
+
"model.language_model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 80 |
+
"model.language_model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 81 |
+
"model.language_model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 82 |
+
"model.language_model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 83 |
+
"model.language_model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 84 |
+
"model.language_model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 85 |
+
"model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 86 |
+
"model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 87 |
+
"model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 88 |
+
"model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 89 |
+
"model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 90 |
+
"model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 91 |
+
"model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 92 |
+
"model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 93 |
+
"model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 94 |
+
"model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 95 |
+
"model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 96 |
+
"model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 97 |
+
"model.language_model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 98 |
+
"model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 99 |
+
"model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 100 |
+
"model.language_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 101 |
+
"model.language_model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 102 |
+
"model.language_model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 103 |
+
"model.language_model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 104 |
+
"model.language_model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 105 |
+
"model.language_model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 106 |
+
"model.language_model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 107 |
+
"model.language_model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 108 |
+
"model.language_model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 109 |
+
"model.language_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 110 |
+
"model.language_model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 111 |
+
"model.language_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 112 |
+
"model.language_model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 113 |
+
"model.language_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 114 |
+
"model.language_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 115 |
+
"model.language_model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 116 |
+
"model.language_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 117 |
+
"model.language_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 118 |
+
"model.language_model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 119 |
+
"model.language_model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 120 |
+
"model.language_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 121 |
+
"model.language_model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 122 |
+
"model.language_model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 123 |
+
"model.language_model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 124 |
+
"model.language_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 125 |
+
"model.language_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 126 |
+
"model.language_model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 127 |
+
"model.language_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 128 |
+
"model.language_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 129 |
+
"model.language_model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 130 |
+
"model.language_model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 131 |
+
"model.language_model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 132 |
+
"model.language_model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 133 |
+
"model.language_model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 134 |
+
"model.language_model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 135 |
+
"model.language_model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 136 |
+
"model.language_model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 137 |
+
"model.language_model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 138 |
+
"model.language_model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 139 |
+
"model.language_model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 140 |
+
"model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 141 |
+
"model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 142 |
+
"model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 143 |
+
"model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 144 |
+
"model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 145 |
+
"model.language_model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 146 |
+
"model.language_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 147 |
+
"model.language_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 148 |
+
"model.language_model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 149 |
+
"model.language_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 150 |
+
"model.language_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 151 |
+
"model.language_model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 152 |
+
"model.language_model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 153 |
+
"model.language_model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 154 |
+
"model.language_model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 155 |
+
"model.language_model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 156 |
+
"model.language_model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 157 |
+
"model.language_model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 158 |
+
"model.language_model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 159 |
+
"model.language_model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 160 |
+
"model.language_model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 161 |
+
"model.language_model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 162 |
+
"model.language_model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 163 |
+
"model.language_model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 164 |
+
"model.language_model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 165 |
+
"model.language_model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 166 |
+
"model.language_model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 167 |
+
"model.language_model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 168 |
+
"model.language_model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 169 |
+
"model.language_model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 170 |
+
"model.language_model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 171 |
+
"model.language_model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 172 |
+
"model.language_model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 173 |
+
"model.language_model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 174 |
+
"model.language_model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 175 |
+
"model.language_model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 176 |
+
"model.language_model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 177 |
+
"model.language_model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 178 |
+
"model.language_model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 179 |
+
"model.language_model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 180 |
+
"model.language_model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 181 |
+
"model.language_model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 182 |
+
"model.language_model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 183 |
+
"model.language_model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 184 |
+
"model.language_model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 185 |
+
"model.language_model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 186 |
+
"model.language_model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 187 |
+
"model.language_model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 188 |
+
"model.language_model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 189 |
+
"model.language_model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 190 |
+
"model.language_model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 191 |
+
"model.language_model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 192 |
+
"model.language_model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 193 |
+
"model.language_model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 194 |
+
"model.language_model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 195 |
+
"model.language_model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 196 |
+
"model.language_model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 197 |
+
"model.language_model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 198 |
+
"model.language_model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 199 |
+
"model.language_model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 200 |
+
"model.language_model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 201 |
+
"model.language_model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 202 |
+
"model.language_model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 203 |
+
"model.language_model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 204 |
+
"model.language_model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 205 |
+
"model.language_model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 206 |
+
"model.language_model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 207 |
+
"model.language_model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 208 |
+
"model.language_model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 209 |
+
"model.language_model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 210 |
+
"model.language_model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 211 |
+
"model.language_model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 212 |
+
"model.language_model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 213 |
+
"model.language_model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 214 |
+
"model.language_model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 215 |
+
"model.language_model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 216 |
+
"model.language_model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 217 |
+
"model.language_model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 218 |
+
"model.language_model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 219 |
+
"model.language_model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 220 |
+
"model.language_model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 221 |
+
"model.language_model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 222 |
+
"model.language_model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 223 |
+
"model.language_model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 224 |
+
"model.language_model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 225 |
+
"model.language_model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 226 |
+
"model.language_model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 227 |
+
"model.language_model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 228 |
+
"model.language_model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 229 |
+
"model.language_model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 230 |
+
"model.language_model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 231 |
+
"model.language_model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 232 |
+
"model.language_model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 233 |
+
"model.language_model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 234 |
+
"model.language_model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 235 |
+
"model.language_model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 236 |
+
"model.language_model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 237 |
+
"model.language_model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 238 |
+
"model.language_model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 239 |
+
"model.language_model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 240 |
+
"model.language_model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 241 |
+
"model.language_model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 242 |
+
"model.language_model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 243 |
+
"model.language_model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 244 |
+
"model.language_model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 245 |
+
"model.language_model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 246 |
+
"model.language_model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 247 |
+
"model.language_model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 248 |
+
"model.language_model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 249 |
+
"model.language_model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 250 |
+
"model.language_model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 251 |
+
"model.language_model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 252 |
+
"model.language_model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 253 |
+
"model.language_model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 254 |
+
"model.language_model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 255 |
+
"model.language_model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 256 |
+
"model.language_model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 257 |
+
"model.language_model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 258 |
+
"model.language_model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 259 |
+
"model.language_model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 260 |
+
"model.language_model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 261 |
+
"model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 262 |
+
"model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 263 |
+
"model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 264 |
+
"model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 265 |
+
"model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 266 |
+
"model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 267 |
+
"model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 268 |
+
"model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 269 |
+
"model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 270 |
+
"model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 271 |
+
"model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 272 |
+
"model.language_model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 273 |
+
"model.language_model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 274 |
+
"model.language_model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 275 |
+
"model.language_model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 276 |
+
"model.language_model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 277 |
+
"model.language_model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 278 |
+
"model.language_model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 279 |
+
"model.language_model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 280 |
+
"model.language_model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 281 |
+
"model.language_model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 282 |
+
"model.language_model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 283 |
+
"model.language_model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 284 |
+
"model.language_model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 285 |
+
"model.language_model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 286 |
+
"model.language_model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 287 |
+
"model.language_model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 288 |
+
"model.language_model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 289 |
+
"model.language_model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 290 |
+
"model.language_model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 291 |
+
"model.language_model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 292 |
+
"model.language_model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 293 |
+
"model.language_model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 294 |
+
"model.language_model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 295 |
+
"model.language_model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 296 |
+
"model.language_model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 297 |
+
"model.language_model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 298 |
+
"model.language_model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 299 |
+
"model.language_model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 300 |
+
"model.language_model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 301 |
+
"model.language_model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 302 |
+
"model.language_model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 303 |
+
"model.language_model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 304 |
+
"model.language_model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 305 |
+
"model.language_model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 306 |
+
"model.language_model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 307 |
+
"model.language_model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 308 |
+
"model.language_model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 309 |
+
"model.language_model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 310 |
+
"model.language_model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 311 |
+
"model.language_model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 312 |
+
"model.language_model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 313 |
+
"model.language_model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 314 |
+
"model.language_model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 315 |
+
"model.language_model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 316 |
+
"model.language_model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 317 |
+
"model.language_model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 318 |
+
"model.language_model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 319 |
+
"model.language_model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 320 |
+
"model.language_model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 321 |
+
"model.language_model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 322 |
+
"model.language_model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 323 |
+
"model.language_model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 324 |
+
"model.language_model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 325 |
+
"model.language_model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 326 |
+
"model.language_model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 327 |
+
"model.language_model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 328 |
+
"model.language_model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 329 |
+
"model.language_model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 330 |
+
"model.language_model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 331 |
+
"model.language_model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 332 |
+
"model.language_model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 333 |
+
"model.language_model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 334 |
+
"model.language_model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 335 |
+
"model.language_model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 336 |
+
"model.language_model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 337 |
+
"model.language_model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 338 |
+
"model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 339 |
+
"model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 340 |
+
"model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 341 |
+
"model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 342 |
+
"model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 343 |
+
"model.language_model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 344 |
+
"model.language_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 345 |
+
"model.language_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 346 |
+
"model.language_model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 347 |
+
"model.language_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 348 |
+
"model.language_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 349 |
+
"model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 350 |
+
"model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 351 |
+
"model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 352 |
+
"model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 353 |
+
"model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 354 |
+
"model.language_model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 355 |
+
"model.language_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 356 |
+
"model.language_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 357 |
+
"model.language_model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 358 |
+
"model.language_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 359 |
+
"model.language_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 360 |
+
"model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 361 |
+
"model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 362 |
+
"model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 363 |
+
"model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 364 |
+
"model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 365 |
+
"model.language_model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 366 |
+
"model.language_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 367 |
+
"model.language_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 368 |
+
"model.language_model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 369 |
+
"model.language_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 370 |
+
"model.language_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 371 |
+
"model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 372 |
+
"model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 373 |
+
"model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 374 |
+
"model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 375 |
+
"model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 376 |
+
"model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 377 |
+
"model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 378 |
+
"model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 379 |
+
"model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 380 |
+
"model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 381 |
+
"model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 382 |
+
"model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 383 |
+
"model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 384 |
+
"model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 385 |
+
"model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 386 |
+
"model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 387 |
+
"model.language_model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 388 |
+
"model.language_model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 389 |
+
"model.language_model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 390 |
+
"model.language_model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 391 |
+
"model.language_model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 392 |
+
"model.language_model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 393 |
+
"model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 394 |
+
"model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 395 |
+
"model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 396 |
+
"model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 397 |
+
"model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 398 |
+
"model.language_model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 399 |
+
"model.language_model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 400 |
+
"model.language_model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 401 |
+
"model.language_model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 402 |
+
"model.language_model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 403 |
+
"model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 404 |
+
"model.language_model.norm.weight": "model-00002-of-00002.safetensors",
|
| 405 |
+
"model.visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 406 |
+
"model.visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 407 |
+
"model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 408 |
+
"model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 409 |
+
"model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 410 |
+
"model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 411 |
+
"model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 412 |
+
"model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 413 |
+
"model.visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
|
| 414 |
+
"model.visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
|
| 415 |
+
"model.visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
|
| 416 |
+
"model.visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
|
| 417 |
+
"model.visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 418 |
+
"model.visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 419 |
+
"model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 420 |
+
"model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 421 |
+
"model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 422 |
+
"model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 423 |
+
"model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 424 |
+
"model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 425 |
+
"model.visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
|
| 426 |
+
"model.visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
|
| 427 |
+
"model.visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
|
| 428 |
+
"model.visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
|
| 429 |
+
"model.visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 430 |
+
"model.visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 431 |
+
"model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 432 |
+
"model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 433 |
+
"model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 434 |
+
"model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 435 |
+
"model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 436 |
+
"model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 437 |
+
"model.visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
|
| 438 |
+
"model.visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
|
| 439 |
+
"model.visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
|
| 440 |
+
"model.visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
|
| 441 |
+
"model.visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 442 |
+
"model.visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 443 |
+
"model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 444 |
+
"model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 445 |
+
"model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 446 |
+
"model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 447 |
+
"model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 448 |
+
"model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 449 |
+
"model.visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
|
| 450 |
+
"model.visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
|
| 451 |
+
"model.visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
|
| 452 |
+
"model.visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
|
| 453 |
+
"model.visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 454 |
+
"model.visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 455 |
+
"model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 456 |
+
"model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 457 |
+
"model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 458 |
+
"model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 459 |
+
"model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 460 |
+
"model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 461 |
+
"model.visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
|
| 462 |
+
"model.visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
|
| 463 |
+
"model.visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
|
| 464 |
+
"model.visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
|
| 465 |
+
"model.visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 466 |
+
"model.visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 467 |
+
"model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 468 |
+
"model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 469 |
+
"model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 470 |
+
"model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 471 |
+
"model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 472 |
+
"model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 473 |
+
"model.visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
|
| 474 |
+
"model.visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
|
| 475 |
+
"model.visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
|
| 476 |
+
"model.visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
|
| 477 |
+
"model.visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 478 |
+
"model.visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 479 |
+
"model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 480 |
+
"model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 481 |
+
"model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 482 |
+
"model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 483 |
+
"model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 484 |
+
"model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 485 |
+
"model.visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
|
| 486 |
+
"model.visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
|
| 487 |
+
"model.visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
|
| 488 |
+
"model.visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
|
| 489 |
+
"model.visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 490 |
+
"model.visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 491 |
+
"model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 492 |
+
"model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 493 |
+
"model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 494 |
+
"model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 495 |
+
"model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 496 |
+
"model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 497 |
+
"model.visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
|
| 498 |
+
"model.visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
|
| 499 |
+
"model.visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
|
| 500 |
+
"model.visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
|
| 501 |
+
"model.visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 502 |
+
"model.visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 503 |
+
"model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 504 |
+
"model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 505 |
+
"model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 506 |
+
"model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 507 |
+
"model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 508 |
+
"model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 509 |
+
"model.visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
|
| 510 |
+
"model.visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
|
| 511 |
+
"model.visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
|
| 512 |
+
"model.visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
|
| 513 |
+
"model.visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 514 |
+
"model.visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 515 |
+
"model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 516 |
+
"model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 517 |
+
"model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 518 |
+
"model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 519 |
+
"model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 520 |
+
"model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 521 |
+
"model.visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
|
| 522 |
+
"model.visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
|
| 523 |
+
"model.visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
|
| 524 |
+
"model.visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
|
| 525 |
+
"model.visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 526 |
+
"model.visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 527 |
+
"model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 528 |
+
"model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 529 |
+
"model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 530 |
+
"model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 531 |
+
"model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 532 |
+
"model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 533 |
+
"model.visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
|
| 534 |
+
"model.visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
|
| 535 |
+
"model.visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
|
| 536 |
+
"model.visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
|
| 537 |
+
"model.visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 538 |
+
"model.visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 539 |
+
"model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 540 |
+
"model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 541 |
+
"model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 542 |
+
"model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 543 |
+
"model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 544 |
+
"model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 545 |
+
"model.visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
|
| 546 |
+
"model.visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
|
| 547 |
+
"model.visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
|
| 548 |
+
"model.visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
|
| 549 |
+
"model.visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 550 |
+
"model.visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 551 |
+
"model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 552 |
+
"model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 553 |
+
"model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 554 |
+
"model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 555 |
+
"model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 556 |
+
"model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 557 |
+
"model.visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
|
| 558 |
+
"model.visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
|
| 559 |
+
"model.visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
|
| 560 |
+
"model.visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
|
| 561 |
+
"model.visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 562 |
+
"model.visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 563 |
+
"model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 564 |
+
"model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 565 |
+
"model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 566 |
+
"model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 567 |
+
"model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 568 |
+
"model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 569 |
+
"model.visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
|
| 570 |
+
"model.visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
|
| 571 |
+
"model.visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
|
| 572 |
+
"model.visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
|
| 573 |
+
"model.visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 574 |
+
"model.visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 575 |
+
"model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 576 |
+
"model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 577 |
+
"model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 578 |
+
"model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 579 |
+
"model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 580 |
+
"model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 581 |
+
"model.visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
|
| 582 |
+
"model.visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
|
| 583 |
+
"model.visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
|
| 584 |
+
"model.visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
|
| 585 |
+
"model.visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 586 |
+
"model.visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 587 |
+
"model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 588 |
+
"model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 589 |
+
"model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 590 |
+
"model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 591 |
+
"model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 592 |
+
"model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 593 |
+
"model.visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
|
| 594 |
+
"model.visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
|
| 595 |
+
"model.visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
|
| 596 |
+
"model.visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
|
| 597 |
+
"model.visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 598 |
+
"model.visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 599 |
+
"model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 600 |
+
"model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 601 |
+
"model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 602 |
+
"model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 603 |
+
"model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 604 |
+
"model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 605 |
+
"model.visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
|
| 606 |
+
"model.visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
|
| 607 |
+
"model.visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
|
| 608 |
+
"model.visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
|
| 609 |
+
"model.visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 610 |
+
"model.visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 611 |
+
"model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 612 |
+
"model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 613 |
+
"model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 614 |
+
"model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 615 |
+
"model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 616 |
+
"model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 617 |
+
"model.visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
|
| 618 |
+
"model.visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
|
| 619 |
+
"model.visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
|
| 620 |
+
"model.visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
|
| 621 |
+
"model.visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 622 |
+
"model.visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 623 |
+
"model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 624 |
+
"model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 625 |
+
"model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 626 |
+
"model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 627 |
+
"model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 628 |
+
"model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 629 |
+
"model.visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
|
| 630 |
+
"model.visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
|
| 631 |
+
"model.visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
|
| 632 |
+
"model.visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
|
| 633 |
+
"model.visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 634 |
+
"model.visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 635 |
+
"model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 636 |
+
"model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 637 |
+
"model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 638 |
+
"model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 639 |
+
"model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 640 |
+
"model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 641 |
+
"model.visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
|
| 642 |
+
"model.visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
|
| 643 |
+
"model.visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
|
| 644 |
+
"model.visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
|
| 645 |
+
"model.visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 646 |
+
"model.visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 647 |
+
"model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 648 |
+
"model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 649 |
+
"model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 650 |
+
"model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 651 |
+
"model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 652 |
+
"model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 653 |
+
"model.visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
|
| 654 |
+
"model.visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
|
| 655 |
+
"model.visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
|
| 656 |
+
"model.visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
|
| 657 |
+
"model.visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 658 |
+
"model.visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 659 |
+
"model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 660 |
+
"model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 661 |
+
"model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 662 |
+
"model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 663 |
+
"model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 664 |
+
"model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 665 |
+
"model.visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
|
| 666 |
+
"model.visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
|
| 667 |
+
"model.visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
|
| 668 |
+
"model.visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
|
| 669 |
+
"model.visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 670 |
+
"model.visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 671 |
+
"model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 672 |
+
"model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 673 |
+
"model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 674 |
+
"model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 675 |
+
"model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 676 |
+
"model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 677 |
+
"model.visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
|
| 678 |
+
"model.visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
|
| 679 |
+
"model.visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
|
| 680 |
+
"model.visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
|
| 681 |
+
"model.visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 682 |
+
"model.visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 683 |
+
"model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 684 |
+
"model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 685 |
+
"model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 686 |
+
"model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 687 |
+
"model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 688 |
+
"model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 689 |
+
"model.visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
|
| 690 |
+
"model.visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
|
| 691 |
+
"model.visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
|
| 692 |
+
"model.visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
|
| 693 |
+
"model.visual.deepstack_merger_list.0.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 694 |
+
"model.visual.deepstack_merger_list.0.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 695 |
+
"model.visual.deepstack_merger_list.0.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 696 |
+
"model.visual.deepstack_merger_list.0.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 697 |
+
"model.visual.deepstack_merger_list.0.norm.bias": "model-00001-of-00002.safetensors",
|
| 698 |
+
"model.visual.deepstack_merger_list.0.norm.weight": "model-00001-of-00002.safetensors",
|
| 699 |
+
"model.visual.deepstack_merger_list.1.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 700 |
+
"model.visual.deepstack_merger_list.1.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 701 |
+
"model.visual.deepstack_merger_list.1.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 702 |
+
"model.visual.deepstack_merger_list.1.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 703 |
+
"model.visual.deepstack_merger_list.1.norm.bias": "model-00001-of-00002.safetensors",
|
| 704 |
+
"model.visual.deepstack_merger_list.1.norm.weight": "model-00001-of-00002.safetensors",
|
| 705 |
+
"model.visual.deepstack_merger_list.2.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 706 |
+
"model.visual.deepstack_merger_list.2.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 707 |
+
"model.visual.deepstack_merger_list.2.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 708 |
+
"model.visual.deepstack_merger_list.2.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 709 |
+
"model.visual.deepstack_merger_list.2.norm.bias": "model-00001-of-00002.safetensors",
|
| 710 |
+
"model.visual.deepstack_merger_list.2.norm.weight": "model-00001-of-00002.safetensors",
|
| 711 |
+
"model.visual.merger.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 712 |
+
"model.visual.merger.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 713 |
+
"model.visual.merger.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 714 |
+
"model.visual.merger.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 715 |
+
"model.visual.merger.norm.bias": "model-00001-of-00002.safetensors",
|
| 716 |
+
"model.visual.merger.norm.weight": "model-00001-of-00002.safetensors",
|
| 717 |
+
"model.visual.patch_embed.proj.bias": "model-00001-of-00002.safetensors",
|
| 718 |
+
"model.visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors",
|
| 719 |
+
"model.visual.pos_embed.weight": "model-00001-of-00002.safetensors"
|
| 720 |
+
}
|
| 721 |
+
}
|
checkpoint-100/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89b3ad373dcf3bd74a3ad7acf5ed97b420e390311b09e78865d242dbfa8f3b22
|
| 3 |
+
size 16090226537
|
checkpoint-100/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0270e1ad0c9d8c5399b137f572ef3efc1595e214f485e39cd0eeb4b144595f09
|
| 3 |
+
size 15365
|
checkpoint-100/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2812d28a777d5dae59a04862e75558192e3e89a0bf8a8e67947b971b4b0b2a4
|
| 3 |
+
size 15365
|
checkpoint-100/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a1be6535f4ad6e64a75bdb510a9b44e670902b500fdfe2306d9a6bd5eca7f1d
|
| 3 |
+
size 15429
|
checkpoint-100/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d9011d072f1bd6627dfe87da75fa202af31eab74b4e5822a0bd17883a8a9e1f
|
| 3 |
+
size 15429
|
checkpoint-100/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c64505c5b36da3be59e9e5daa60039a6fba9dc5c33f06140c82afef8dd3fe60
|
| 3 |
+
size 1465
|
checkpoint-100/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
checkpoint-100/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67cc0080ffd7555f723f423c27cfef314e1ad9d335c8b79f465c5faba1ed478b
|
| 3 |
+
size 11422821
|
checkpoint-100/tokenizer_config.json
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"clean_up_tokenization_spaces": false,
|
| 231 |
+
"eos_token": "<|im_end|>",
|
| 232 |
+
"errors": "replace",
|
| 233 |
+
"extra_special_tokens": {},
|
| 234 |
+
"max_length": null,
|
| 235 |
+
"model_max_length": 262144,
|
| 236 |
+
"pad_to_multiple_of": null,
|
| 237 |
+
"pad_token": "<|endoftext|>",
|
| 238 |
+
"pad_token_type_id": 0,
|
| 239 |
+
"padding_side": "left",
|
| 240 |
+
"processor_class": "Qwen3VLProcessor",
|
| 241 |
+
"split_special_tokens": false,
|
| 242 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 243 |
+
"unk_token": null
|
| 244 |
+
}
|
checkpoint-100/trainer_state.json
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.021431633090441493,
|
| 6 |
+
"eval_steps": 72,
|
| 7 |
+
"global_step": 100,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"clip_ratio/high_max": 0.0,
|
| 14 |
+
"clip_ratio/high_mean": 0.0,
|
| 15 |
+
"clip_ratio/low_mean": 0.0,
|
| 16 |
+
"clip_ratio/low_min": 0.0,
|
| 17 |
+
"clip_ratio/region_mean": 0.0,
|
| 18 |
+
"completions/clipped_ratio": 0.0,
|
| 19 |
+
"completions/max_length": 728.3,
|
| 20 |
+
"completions/max_terminated_length": 728.3,
|
| 21 |
+
"completions/mean_length": 690.4875061035157,
|
| 22 |
+
"completions/mean_terminated_length": 690.4875061035157,
|
| 23 |
+
"completions/min_length": 651.7,
|
| 24 |
+
"completions/min_terminated_length": 651.7,
|
| 25 |
+
"entropy": 0.5155309438705444,
|
| 26 |
+
"epoch": 0.0021431633090441492,
|
| 27 |
+
"frac_reward_zero_std": 0.0,
|
| 28 |
+
"grad_norm": 0.953125,
|
| 29 |
+
"kl": 0.0009624507569242269,
|
| 30 |
+
"learning_rate": 9.691780821917808e-07,
|
| 31 |
+
"loss": -0.0006,
|
| 32 |
+
"num_tokens": 997482.0,
|
| 33 |
+
"reward": 0.017473320267163216,
|
| 34 |
+
"reward_std": 0.03684660438448191,
|
| 35 |
+
"rewards/ADEnReward/mean": 0.017473319987766446,
|
| 36 |
+
"rewards/ADEnReward/std": 0.04930391618981957,
|
| 37 |
+
"step": 10,
|
| 38 |
+
"step_time": 30.737299674004316
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"clip_ratio/high_max": 0.0,
|
| 42 |
+
"clip_ratio/high_mean": 0.0,
|
| 43 |
+
"clip_ratio/low_mean": 0.0,
|
| 44 |
+
"clip_ratio/low_min": 0.0,
|
| 45 |
+
"clip_ratio/region_mean": 0.0,
|
| 46 |
+
"completions/clipped_ratio": 0.0,
|
| 47 |
+
"completions/max_length": 726.3,
|
| 48 |
+
"completions/max_terminated_length": 726.3,
|
| 49 |
+
"completions/mean_length": 689.114599609375,
|
| 50 |
+
"completions/mean_terminated_length": 689.114599609375,
|
| 51 |
+
"completions/min_length": 654.4,
|
| 52 |
+
"completions/min_terminated_length": 654.4,
|
| 53 |
+
"entropy": 0.5154049336910248,
|
| 54 |
+
"epoch": 0.0042863266180882984,
|
| 55 |
+
"frac_reward_zero_std": 0.0,
|
| 56 |
+
"grad_norm": 1.0859375,
|
| 57 |
+
"kl": 0.001545107248239219,
|
| 58 |
+
"learning_rate": 9.349315068493149e-07,
|
| 59 |
+
"loss": -0.002,
|
| 60 |
+
"num_tokens": 1994385.0,
|
| 61 |
+
"reward": 0.02515518048312515,
|
| 62 |
+
"reward_std": 0.051526497805025426,
|
| 63 |
+
"rewards/ADEnReward/mean": 0.025155179359717294,
|
| 64 |
+
"rewards/ADEnReward/std": 0.06882240404374898,
|
| 65 |
+
"step": 20,
|
| 66 |
+
"step_time": 30.774630016833544
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"clip_ratio/high_max": 0.0,
|
| 70 |
+
"clip_ratio/high_mean": 0.0,
|
| 71 |
+
"clip_ratio/low_mean": 0.0,
|
| 72 |
+
"clip_ratio/low_min": 0.0,
|
| 73 |
+
"clip_ratio/region_mean": 0.0,
|
| 74 |
+
"completions/clipped_ratio": 0.0,
|
| 75 |
+
"completions/max_length": 727.7,
|
| 76 |
+
"completions/max_terminated_length": 727.7,
|
| 77 |
+
"completions/mean_length": 687.0104431152344,
|
| 78 |
+
"completions/mean_terminated_length": 687.0104431152344,
|
| 79 |
+
"completions/min_length": 647.7,
|
| 80 |
+
"completions/min_terminated_length": 647.7,
|
| 81 |
+
"entropy": 0.5073866218328476,
|
| 82 |
+
"epoch": 0.006429489927132447,
|
| 83 |
+
"frac_reward_zero_std": 0.0,
|
| 84 |
+
"grad_norm": 0.84375,
|
| 85 |
+
"kl": 0.00105394265265204,
|
| 86 |
+
"learning_rate": 9.006849315068494e-07,
|
| 87 |
+
"loss": -0.001,
|
| 88 |
+
"num_tokens": 2989654.0,
|
| 89 |
+
"reward": 0.02013225699774921,
|
| 90 |
+
"reward_std": 0.039358591521158816,
|
| 91 |
+
"rewards/ADEnReward/mean": 0.020132256811484693,
|
| 92 |
+
"rewards/ADEnReward/std": 0.054149537533521655,
|
| 93 |
+
"step": 30,
|
| 94 |
+
"step_time": 30.921874072402716
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"clip_ratio/high_max": 0.0,
|
| 98 |
+
"clip_ratio/high_mean": 0.0,
|
| 99 |
+
"clip_ratio/low_mean": 0.0,
|
| 100 |
+
"clip_ratio/low_min": 0.0,
|
| 101 |
+
"clip_ratio/region_mean": 0.0,
|
| 102 |
+
"completions/clipped_ratio": 0.0,
|
| 103 |
+
"completions/max_length": 730.3,
|
| 104 |
+
"completions/max_terminated_length": 730.3,
|
| 105 |
+
"completions/mean_length": 691.108349609375,
|
| 106 |
+
"completions/mean_terminated_length": 691.108349609375,
|
| 107 |
+
"completions/min_length": 655.2,
|
| 108 |
+
"completions/min_terminated_length": 655.2,
|
| 109 |
+
"entropy": 0.5206563144922256,
|
| 110 |
+
"epoch": 0.008572653236176597,
|
| 111 |
+
"frac_reward_zero_std": 0.0,
|
| 112 |
+
"grad_norm": 0.8515625,
|
| 113 |
+
"kl": 0.001020824775332585,
|
| 114 |
+
"learning_rate": 8.664383561643836e-07,
|
| 115 |
+
"loss": -0.0003,
|
| 116 |
+
"num_tokens": 3987498.0,
|
| 117 |
+
"reward": 0.022847729618661106,
|
| 118 |
+
"reward_std": 0.05116785345599055,
|
| 119 |
+
"rewards/ADEnReward/mean": 0.022847728175111114,
|
| 120 |
+
"rewards/ADEnReward/std": 0.06853441912680865,
|
| 121 |
+
"step": 40,
|
| 122 |
+
"step_time": 30.637446269392967
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"clip_ratio/high_max": 0.0,
|
| 126 |
+
"clip_ratio/high_mean": 0.0,
|
| 127 |
+
"clip_ratio/low_mean": 0.0,
|
| 128 |
+
"clip_ratio/low_min": 0.0,
|
| 129 |
+
"clip_ratio/region_mean": 0.0,
|
| 130 |
+
"completions/clipped_ratio": 0.0,
|
| 131 |
+
"completions/max_length": 727.2,
|
| 132 |
+
"completions/max_terminated_length": 727.2,
|
| 133 |
+
"completions/mean_length": 687.1041870117188,
|
| 134 |
+
"completions/mean_terminated_length": 687.1041870117188,
|
| 135 |
+
"completions/min_length": 655.1,
|
| 136 |
+
"completions/min_terminated_length": 655.1,
|
| 137 |
+
"entropy": 0.5212983191013336,
|
| 138 |
+
"epoch": 0.010715816545220747,
|
| 139 |
+
"frac_reward_zero_std": 0.0,
|
| 140 |
+
"grad_norm": 1.0,
|
| 141 |
+
"kl": 0.0006436160067096353,
|
| 142 |
+
"learning_rate": 8.321917808219178e-07,
|
| 143 |
+
"loss": -0.0,
|
| 144 |
+
"num_tokens": 4983244.0,
|
| 145 |
+
"reward": 0.02881853673607111,
|
| 146 |
+
"reward_std": 0.06065227556973696,
|
| 147 |
+
"rewards/ADEnReward/mean": 0.028818535897880793,
|
| 148 |
+
"rewards/ADEnReward/std": 0.07447688719257713,
|
| 149 |
+
"step": 50,
|
| 150 |
+
"step_time": 30.621672691404818
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"clip_ratio/high_max": 0.0,
|
| 154 |
+
"clip_ratio/high_mean": 0.0,
|
| 155 |
+
"clip_ratio/low_mean": 0.0,
|
| 156 |
+
"clip_ratio/low_min": 0.0,
|
| 157 |
+
"clip_ratio/region_mean": 0.0,
|
| 158 |
+
"completions/clipped_ratio": 0.0,
|
| 159 |
+
"completions/max_length": 720.9,
|
| 160 |
+
"completions/max_terminated_length": 720.9,
|
| 161 |
+
"completions/mean_length": 689.1104431152344,
|
| 162 |
+
"completions/mean_terminated_length": 689.1104431152344,
|
| 163 |
+
"completions/min_length": 658.7,
|
| 164 |
+
"completions/min_terminated_length": 658.7,
|
| 165 |
+
"entropy": 0.5353182137012482,
|
| 166 |
+
"epoch": 0.012858979854264894,
|
| 167 |
+
"frac_reward_zero_std": 0.0,
|
| 168 |
+
"grad_norm": 0.88671875,
|
| 169 |
+
"kl": 0.0006802360760048032,
|
| 170 |
+
"learning_rate": 7.97945205479452e-07,
|
| 171 |
+
"loss": 0.0004,
|
| 172 |
+
"num_tokens": 5979953.0,
|
| 173 |
+
"reward": 0.021031666733324527,
|
| 174 |
+
"reward_std": 0.04476789850741625,
|
| 175 |
+
"rewards/ADEnReward/mean": 0.02103166626766324,
|
| 176 |
+
"rewards/ADEnReward/std": 0.060725963488221166,
|
| 177 |
+
"step": 60,
|
| 178 |
+
"step_time": 30.693003302812578
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"clip_ratio/high_max": 0.0,
|
| 182 |
+
"clip_ratio/high_mean": 0.0,
|
| 183 |
+
"clip_ratio/low_mean": 0.0,
|
| 184 |
+
"clip_ratio/low_min": 0.0,
|
| 185 |
+
"clip_ratio/region_mean": 0.0,
|
| 186 |
+
"completions/clipped_ratio": 0.0,
|
| 187 |
+
"completions/max_length": 729.1,
|
| 188 |
+
"completions/max_terminated_length": 729.1,
|
| 189 |
+
"completions/mean_length": 690.0625183105469,
|
| 190 |
+
"completions/mean_terminated_length": 690.0625183105469,
|
| 191 |
+
"completions/min_length": 655.5,
|
| 192 |
+
"completions/min_terminated_length": 655.5,
|
| 193 |
+
"entropy": 0.5152539879083633,
|
| 194 |
+
"epoch": 0.015002143163309044,
|
| 195 |
+
"frac_reward_zero_std": 0.0,
|
| 196 |
+
"grad_norm": 0.71875,
|
| 197 |
+
"kl": 0.0006728454434778541,
|
| 198 |
+
"learning_rate": 7.636986301369863e-07,
|
| 199 |
+
"loss": 0.0006,
|
| 200 |
+
"num_tokens": 6977375.0,
|
| 201 |
+
"reward": 0.02499701709020883,
|
| 202 |
+
"reward_std": 0.052732100058346984,
|
| 203 |
+
"rewards/ADEnReward/mean": 0.02499701699707657,
|
| 204 |
+
"rewards/ADEnReward/std": 0.06445601325249299,
|
| 205 |
+
"step": 70,
|
| 206 |
+
"step_time": 30.69759728834033
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.015430775825117874,
|
| 210 |
+
"eval_clip_ratio/high_max": 0.0,
|
| 211 |
+
"eval_clip_ratio/high_mean": 0.0,
|
| 212 |
+
"eval_clip_ratio/low_mean": 0.0,
|
| 213 |
+
"eval_clip_ratio/low_min": 0.0,
|
| 214 |
+
"eval_clip_ratio/region_mean": 0.0,
|
| 215 |
+
"eval_completions/clipped_ratio": 0.0003109452828987321,
|
| 216 |
+
"eval_completions/max_length": 734.5223880597015,
|
| 217 |
+
"eval_completions/max_terminated_length": 730.1343283582089,
|
| 218 |
+
"eval_completions/mean_length": 691.7577942805503,
|
| 219 |
+
"eval_completions/mean_terminated_length": 691.6533622172342,
|
| 220 |
+
"eval_completions/min_length": 655.8805970149253,
|
| 221 |
+
"eval_completions/min_terminated_length": 655.8805970149253,
|
| 222 |
+
"eval_entropy": 0.5390982583387575,
|
| 223 |
+
"eval_frac_reward_zero_std": 0.009950249052759427,
|
| 224 |
+
"eval_kl": 0.0007209435779490132,
|
| 225 |
+
"eval_loss": -0.0006809970363974571,
|
| 226 |
+
"eval_num_tokens": 7175890.0,
|
| 227 |
+
"eval_reward": 0.014476228026245763,
|
| 228 |
+
"eval_reward_std": 0.02974726758991132,
|
| 229 |
+
"eval_rewards/ADEnReward/mean": 0.01447622790266308,
|
| 230 |
+
"eval_rewards/ADEnReward/std": 0.04060708046908171,
|
| 231 |
+
"eval_runtime": 1940.8413,
|
| 232 |
+
"eval_samples_per_second": 0.103,
|
| 233 |
+
"eval_steps_per_second": 0.003,
|
| 234 |
+
"step": 72
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"clip_ratio/high_max": 0.0,
|
| 238 |
+
"clip_ratio/high_mean": 0.0,
|
| 239 |
+
"clip_ratio/low_mean": 0.0,
|
| 240 |
+
"clip_ratio/low_min": 0.0,
|
| 241 |
+
"clip_ratio/region_mean": 0.0,
|
| 242 |
+
"completions/clipped_ratio": 0.0,
|
| 243 |
+
"completions/max_length": 724.8,
|
| 244 |
+
"completions/max_terminated_length": 724.8,
|
| 245 |
+
"completions/mean_length": 685.8041809082031,
|
| 246 |
+
"completions/mean_terminated_length": 685.8041809082031,
|
| 247 |
+
"completions/min_length": 646.9,
|
| 248 |
+
"completions/min_terminated_length": 646.9,
|
| 249 |
+
"entropy": 0.5170553356409073,
|
| 250 |
+
"epoch": 0.017145306472353194,
|
| 251 |
+
"frac_reward_zero_std": 0.0,
|
| 252 |
+
"grad_norm": 0.76953125,
|
| 253 |
+
"kl": 0.0006296096777077764,
|
| 254 |
+
"learning_rate": 7.294520547945205e-07,
|
| 255 |
+
"loss": -0.0001,
|
| 256 |
+
"num_tokens": 7972113.0,
|
| 257 |
+
"reward": 0.027504462655633687,
|
| 258 |
+
"reward_std": 0.05410240553319454,
|
| 259 |
+
"rewards/ADEnReward/mean": 0.027504462469369172,
|
| 260 |
+
"rewards/ADEnReward/std": 0.07011906299740076,
|
| 261 |
+
"step": 80,
|
| 262 |
+
"step_time": 29.781939852237702
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"clip_ratio/high_max": 0.0,
|
| 266 |
+
"clip_ratio/high_mean": 0.0,
|
| 267 |
+
"clip_ratio/low_mean": 0.0,
|
| 268 |
+
"clip_ratio/low_min": 0.0,
|
| 269 |
+
"clip_ratio/region_mean": 0.0,
|
| 270 |
+
"completions/clipped_ratio": 0.0,
|
| 271 |
+
"completions/max_length": 729.1,
|
| 272 |
+
"completions/max_terminated_length": 729.1,
|
| 273 |
+
"completions/mean_length": 692.3312622070313,
|
| 274 |
+
"completions/mean_terminated_length": 692.3312622070313,
|
| 275 |
+
"completions/min_length": 661.3,
|
| 276 |
+
"completions/min_terminated_length": 661.3,
|
| 277 |
+
"entropy": 0.5159202754497528,
|
| 278 |
+
"epoch": 0.01928846978139734,
|
| 279 |
+
"frac_reward_zero_std": 0.0,
|
| 280 |
+
"grad_norm": 0.95703125,
|
| 281 |
+
"kl": 0.0007557698409073055,
|
| 282 |
+
"learning_rate": 6.952054794520548e-07,
|
| 283 |
+
"loss": 0.0003,
|
| 284 |
+
"num_tokens": 8971120.0,
|
| 285 |
+
"reward": 0.031002284376882017,
|
| 286 |
+
"reward_std": 0.058332843240350485,
|
| 287 |
+
"rewards/ADEnReward/mean": 0.03100228389375843,
|
| 288 |
+
"rewards/ADEnReward/std": 0.06421787270810456,
|
| 289 |
+
"step": 90,
|
| 290 |
+
"step_time": 29.85468419864774
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"clip_ratio/high_max": 0.0,
|
| 294 |
+
"clip_ratio/high_mean": 0.0,
|
| 295 |
+
"clip_ratio/low_mean": 0.0,
|
| 296 |
+
"clip_ratio/low_min": 0.0,
|
| 297 |
+
"clip_ratio/region_mean": 0.0,
|
| 298 |
+
"completions/clipped_ratio": 0.0,
|
| 299 |
+
"completions/max_length": 728.2,
|
| 300 |
+
"completions/max_terminated_length": 728.2,
|
| 301 |
+
"completions/mean_length": 689.6791931152344,
|
| 302 |
+
"completions/mean_terminated_length": 689.6791931152344,
|
| 303 |
+
"completions/min_length": 655.0,
|
| 304 |
+
"completions/min_terminated_length": 655.0,
|
| 305 |
+
"entropy": 0.524771922826767,
|
| 306 |
+
"epoch": 0.021431633090441493,
|
| 307 |
+
"frac_reward_zero_std": 0.0,
|
| 308 |
+
"grad_norm": 1.375,
|
| 309 |
+
"kl": 0.000729308504378423,
|
| 310 |
+
"learning_rate": 6.60958904109589e-07,
|
| 311 |
+
"loss": -0.0009,
|
| 312 |
+
"num_tokens": 9968310.0,
|
| 313 |
+
"reward": 0.03060391815379262,
|
| 314 |
+
"reward_std": 0.04884043680503965,
|
| 315 |
+
"rewards/ADEnReward/mean": 0.03060391787439585,
|
| 316 |
+
"rewards/ADEnReward/std": 0.07001540651544928,
|
| 317 |
+
"step": 100,
|
| 318 |
+
"step_time": 29.800767001509666
|
| 319 |
+
}
|
| 320 |
+
],
|
| 321 |
+
"logging_steps": 10,
|
| 322 |
+
"max_steps": 292,
|
| 323 |
+
"num_input_tokens_seen": 9968310,
|
| 324 |
+
"num_train_epochs": 1,
|
| 325 |
+
"save_steps": 100,
|
| 326 |
+
"stateful_callbacks": {
|
| 327 |
+
"TrainerControl": {
|
| 328 |
+
"args": {
|
| 329 |
+
"should_epoch_stop": false,
|
| 330 |
+
"should_evaluate": false,
|
| 331 |
+
"should_log": false,
|
| 332 |
+
"should_save": true,
|
| 333 |
+
"should_training_stop": false
|
| 334 |
+
},
|
| 335 |
+
"attributes": {}
|
| 336 |
+
}
|
| 337 |
+
},
|
| 338 |
+
"total_flos": 0.0,
|
| 339 |
+
"train_batch_size": 12,
|
| 340 |
+
"trial_name": null,
|
| 341 |
+
"trial_params": null
|
| 342 |
+
}
|
checkpoint-100/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbde34589936c15afd6048c963f9277d5d47635a7f4b4ddffd47df13e59cdd3c
|
| 3 |
+
size 7569
|
checkpoint-100/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-200/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
checkpoint-200/chat_template.jinja
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- set image_count = namespace(value=0) %}
|
| 2 |
+
{%- set video_count = namespace(value=0) %}
|
| 3 |
+
{%- macro render_content(content, do_vision_count) %}
|
| 4 |
+
{%- if content is string %}
|
| 5 |
+
{{- content }}
|
| 6 |
+
{%- else %}
|
| 7 |
+
{%- for item in content %}
|
| 8 |
+
{%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
|
| 9 |
+
{%- if do_vision_count %}
|
| 10 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 11 |
+
{%- endif %}
|
| 12 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 13 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 14 |
+
{%- elif 'video' in item or item.type == 'video' %}
|
| 15 |
+
{%- if do_vision_count %}
|
| 16 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 17 |
+
{%- endif %}
|
| 18 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 19 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 20 |
+
{%- elif 'text' in item %}
|
| 21 |
+
{{- item.text }}
|
| 22 |
+
{%- endif %}
|
| 23 |
+
{%- endfor %}
|
| 24 |
+
{%- endif %}
|
| 25 |
+
{%- endmacro %}
|
| 26 |
+
{%- if tools %}
|
| 27 |
+
{{- '<|im_start|>system\n' }}
|
| 28 |
+
{%- if messages[0].role == 'system' %}
|
| 29 |
+
{{- render_content(messages[0].content, false) + '\n\n' }}
|
| 30 |
+
{%- endif %}
|
| 31 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 32 |
+
{%- for tool in tools %}
|
| 33 |
+
{{- "\n" }}
|
| 34 |
+
{{- tool | tojson }}
|
| 35 |
+
{%- endfor %}
|
| 36 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 37 |
+
{%- else %}
|
| 38 |
+
{%- if messages[0].role == 'system' %}
|
| 39 |
+
{{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
|
| 40 |
+
{%- endif %}
|
| 41 |
+
{%- endif %}
|
| 42 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 43 |
+
{%- for message in messages[::-1] %}
|
| 44 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 45 |
+
{%- if ns.multi_step_tool and message.role == "user" %}
|
| 46 |
+
{%- set content = render_content(message.content, false) %}
|
| 47 |
+
{%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
|
| 48 |
+
{%- set ns.multi_step_tool = false %}
|
| 49 |
+
{%- set ns.last_query_index = index %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endif %}
|
| 52 |
+
{%- endfor %}
|
| 53 |
+
{%- for message in messages %}
|
| 54 |
+
{%- set content = render_content(message.content, True) %}
|
| 55 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 56 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 57 |
+
{%- elif message.role == "assistant" %}
|
| 58 |
+
{%- set reasoning_content = '' %}
|
| 59 |
+
{%- if message.reasoning_content is string %}
|
| 60 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{%- if '</think>' in content %}
|
| 63 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 64 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 65 |
+
{%- endif %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 68 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 69 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 70 |
+
{%- else %}
|
| 71 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{%- else %}
|
| 74 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 75 |
+
{%- endif %}
|
| 76 |
+
{%- if message.tool_calls %}
|
| 77 |
+
{%- for tool_call in message.tool_calls %}
|
| 78 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 79 |
+
{{- '\n' }}
|
| 80 |
+
{%- endif %}
|
| 81 |
+
{%- if tool_call.function %}
|
| 82 |
+
{%- set tool_call = tool_call.function %}
|
| 83 |
+
{%- endif %}
|
| 84 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 85 |
+
{{- tool_call.name }}
|
| 86 |
+
{{- '", "arguments": ' }}
|
| 87 |
+
{%- if tool_call.arguments is string %}
|
| 88 |
+
{{- tool_call.arguments }}
|
| 89 |
+
{%- else %}
|
| 90 |
+
{{- tool_call.arguments | tojson }}
|
| 91 |
+
{%- endif %}
|
| 92 |
+
{{- '}\n</tool_call>' }}
|
| 93 |
+
{%- endfor %}
|
| 94 |
+
{%- endif %}
|
| 95 |
+
{{- '<|im_end|>\n' }}
|
| 96 |
+
{%- elif message.role == "tool" %}
|
| 97 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 98 |
+
{{- '<|im_start|>user' }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{{- '\n<tool_response>\n' }}
|
| 101 |
+
{{- content }}
|
| 102 |
+
{{- '\n</tool_response>' }}
|
| 103 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 104 |
+
{{- '<|im_end|>\n' }}
|
| 105 |
+
{%- endif %}
|
| 106 |
+
{%- endif %}
|
| 107 |
+
{%- endfor %}
|
| 108 |
+
{%- if add_generation_prompt %}
|
| 109 |
+
{{- '<|im_start|>assistant\n' }}
|
| 110 |
+
{%- endif %}
|
checkpoint-200/config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3VLForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"eos_token_id": 151645,
|
| 7 |
+
"image_token_id": 151655,
|
| 8 |
+
"model_type": "qwen3_vl",
|
| 9 |
+
"pad_token_id": 151643,
|
| 10 |
+
"text_config": {
|
| 11 |
+
"attention_bias": false,
|
| 12 |
+
"attention_dropout": 0.0,
|
| 13 |
+
"bos_token_id": 151643,
|
| 14 |
+
"dtype": "bfloat16",
|
| 15 |
+
"eos_token_id": 151645,
|
| 16 |
+
"head_dim": 128,
|
| 17 |
+
"hidden_act": "silu",
|
| 18 |
+
"hidden_size": 2560,
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 9728,
|
| 21 |
+
"max_position_embeddings": 262144,
|
| 22 |
+
"model_type": "qwen3_vl_text",
|
| 23 |
+
"num_attention_heads": 32,
|
| 24 |
+
"num_hidden_layers": 36,
|
| 25 |
+
"num_key_value_heads": 8,
|
| 26 |
+
"rms_norm_eps": 1e-06,
|
| 27 |
+
"rope_scaling": {
|
| 28 |
+
"mrope_interleaved": true,
|
| 29 |
+
"mrope_section": [
|
| 30 |
+
24,
|
| 31 |
+
20,
|
| 32 |
+
20
|
| 33 |
+
],
|
| 34 |
+
"rope_type": "default"
|
| 35 |
+
},
|
| 36 |
+
"rope_theta": 5000000,
|
| 37 |
+
"tie_word_embeddings": true,
|
| 38 |
+
"use_cache": true,
|
| 39 |
+
"vocab_size": 151936
|
| 40 |
+
},
|
| 41 |
+
"tie_word_embeddings": true,
|
| 42 |
+
"transformers_version": "4.57.6",
|
| 43 |
+
"use_cache": false,
|
| 44 |
+
"video_token_id": 151656,
|
| 45 |
+
"vision_config": {
|
| 46 |
+
"deepstack_visual_indexes": [
|
| 47 |
+
5,
|
| 48 |
+
11,
|
| 49 |
+
17
|
| 50 |
+
],
|
| 51 |
+
"depth": 24,
|
| 52 |
+
"dtype": "bfloat16",
|
| 53 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 54 |
+
"hidden_size": 1024,
|
| 55 |
+
"in_channels": 3,
|
| 56 |
+
"initializer_range": 0.02,
|
| 57 |
+
"intermediate_size": 4096,
|
| 58 |
+
"model_type": "qwen3_vl",
|
| 59 |
+
"num_heads": 16,
|
| 60 |
+
"num_position_embeddings": 2304,
|
| 61 |
+
"out_hidden_size": 2560,
|
| 62 |
+
"patch_size": 16,
|
| 63 |
+
"spatial_merge_size": 2,
|
| 64 |
+
"temporal_patch_size": 2
|
| 65 |
+
},
|
| 66 |
+
"vision_end_token_id": 151653,
|
| 67 |
+
"vision_start_token_id": 151652
|
| 68 |
+
}
|
checkpoint-200/generation_config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_sample": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151645,
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"top_k": 20,
|
| 10 |
+
"top_p": 0.95,
|
| 11 |
+
"transformers_version": "4.57.6"
|
| 12 |
+
}
|
checkpoint-200/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-200/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82a6c7ae32ce39bbd0f0176992dd578f0049d2e773fd6cef8940780ab346bb75
|
| 3 |
+
size 4990497880
|
checkpoint-200/model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f26a960badde0554a48bbbeb7498661684ba900b033f6ad33174ca84321acd2
|
| 3 |
+
size 3885221448
|
checkpoint-200/model.safetensors.index.json
ADDED
|
@@ -0,0 +1,721 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_parameters": 4437815808,
|
| 4 |
+
"total_size": 8875631616
|
| 5 |
+
},
|
| 6 |
+
"weight_map": {
|
| 7 |
+
"model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
| 8 |
+
"model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 9 |
+
"model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 10 |
+
"model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 11 |
+
"model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 12 |
+
"model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 13 |
+
"model.language_model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 14 |
+
"model.language_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 15 |
+
"model.language_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 16 |
+
"model.language_model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 17 |
+
"model.language_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 18 |
+
"model.language_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 19 |
+
"model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 20 |
+
"model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 21 |
+
"model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 22 |
+
"model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 23 |
+
"model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 24 |
+
"model.language_model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 25 |
+
"model.language_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 26 |
+
"model.language_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 27 |
+
"model.language_model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 28 |
+
"model.language_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 29 |
+
"model.language_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 30 |
+
"model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 31 |
+
"model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 32 |
+
"model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 33 |
+
"model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 34 |
+
"model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 35 |
+
"model.language_model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 36 |
+
"model.language_model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 37 |
+
"model.language_model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 38 |
+
"model.language_model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 39 |
+
"model.language_model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 40 |
+
"model.language_model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 41 |
+
"model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 42 |
+
"model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 43 |
+
"model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 44 |
+
"model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 45 |
+
"model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 46 |
+
"model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 47 |
+
"model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 48 |
+
"model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 49 |
+
"model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 50 |
+
"model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 51 |
+
"model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 52 |
+
"model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 53 |
+
"model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 54 |
+
"model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 55 |
+
"model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 56 |
+
"model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 57 |
+
"model.language_model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 58 |
+
"model.language_model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 59 |
+
"model.language_model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 60 |
+
"model.language_model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 61 |
+
"model.language_model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 62 |
+
"model.language_model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 63 |
+
"model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 64 |
+
"model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 65 |
+
"model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 66 |
+
"model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 67 |
+
"model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 68 |
+
"model.language_model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 69 |
+
"model.language_model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 70 |
+
"model.language_model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 71 |
+
"model.language_model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 72 |
+
"model.language_model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 73 |
+
"model.language_model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 74 |
+
"model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 75 |
+
"model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 76 |
+
"model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 77 |
+
"model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 78 |
+
"model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 79 |
+
"model.language_model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 80 |
+
"model.language_model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 81 |
+
"model.language_model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 82 |
+
"model.language_model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 83 |
+
"model.language_model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 84 |
+
"model.language_model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 85 |
+
"model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 86 |
+
"model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 87 |
+
"model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 88 |
+
"model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 89 |
+
"model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 90 |
+
"model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 91 |
+
"model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 92 |
+
"model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 93 |
+
"model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 94 |
+
"model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 95 |
+
"model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 96 |
+
"model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 97 |
+
"model.language_model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 98 |
+
"model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 99 |
+
"model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 100 |
+
"model.language_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 101 |
+
"model.language_model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 102 |
+
"model.language_model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 103 |
+
"model.language_model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 104 |
+
"model.language_model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 105 |
+
"model.language_model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 106 |
+
"model.language_model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 107 |
+
"model.language_model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 108 |
+
"model.language_model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 109 |
+
"model.language_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 110 |
+
"model.language_model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 111 |
+
"model.language_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 112 |
+
"model.language_model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 113 |
+
"model.language_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 114 |
+
"model.language_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 115 |
+
"model.language_model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 116 |
+
"model.language_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 117 |
+
"model.language_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 118 |
+
"model.language_model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 119 |
+
"model.language_model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 120 |
+
"model.language_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 121 |
+
"model.language_model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 122 |
+
"model.language_model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 123 |
+
"model.language_model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 124 |
+
"model.language_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 125 |
+
"model.language_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 126 |
+
"model.language_model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 127 |
+
"model.language_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 128 |
+
"model.language_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 129 |
+
"model.language_model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 130 |
+
"model.language_model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 131 |
+
"model.language_model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 132 |
+
"model.language_model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 133 |
+
"model.language_model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 134 |
+
"model.language_model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 135 |
+
"model.language_model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 136 |
+
"model.language_model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 137 |
+
"model.language_model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 138 |
+
"model.language_model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 139 |
+
"model.language_model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 140 |
+
"model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 141 |
+
"model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 142 |
+
"model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 143 |
+
"model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 144 |
+
"model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 145 |
+
"model.language_model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 146 |
+
"model.language_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 147 |
+
"model.language_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 148 |
+
"model.language_model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 149 |
+
"model.language_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 150 |
+
"model.language_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 151 |
+
"model.language_model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 152 |
+
"model.language_model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 153 |
+
"model.language_model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 154 |
+
"model.language_model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 155 |
+
"model.language_model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 156 |
+
"model.language_model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 157 |
+
"model.language_model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 158 |
+
"model.language_model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 159 |
+
"model.language_model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 160 |
+
"model.language_model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 161 |
+
"model.language_model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 162 |
+
"model.language_model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 163 |
+
"model.language_model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 164 |
+
"model.language_model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 165 |
+
"model.language_model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 166 |
+
"model.language_model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 167 |
+
"model.language_model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 168 |
+
"model.language_model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 169 |
+
"model.language_model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 170 |
+
"model.language_model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 171 |
+
"model.language_model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 172 |
+
"model.language_model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 173 |
+
"model.language_model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 174 |
+
"model.language_model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 175 |
+
"model.language_model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 176 |
+
"model.language_model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 177 |
+
"model.language_model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 178 |
+
"model.language_model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 179 |
+
"model.language_model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 180 |
+
"model.language_model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 181 |
+
"model.language_model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 182 |
+
"model.language_model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 183 |
+
"model.language_model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 184 |
+
"model.language_model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 185 |
+
"model.language_model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 186 |
+
"model.language_model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 187 |
+
"model.language_model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 188 |
+
"model.language_model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 189 |
+
"model.language_model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 190 |
+
"model.language_model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 191 |
+
"model.language_model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 192 |
+
"model.language_model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 193 |
+
"model.language_model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 194 |
+
"model.language_model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 195 |
+
"model.language_model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 196 |
+
"model.language_model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 197 |
+
"model.language_model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 198 |
+
"model.language_model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 199 |
+
"model.language_model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 200 |
+
"model.language_model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 201 |
+
"model.language_model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 202 |
+
"model.language_model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 203 |
+
"model.language_model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 204 |
+
"model.language_model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 205 |
+
"model.language_model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 206 |
+
"model.language_model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 207 |
+
"model.language_model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 208 |
+
"model.language_model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 209 |
+
"model.language_model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 210 |
+
"model.language_model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 211 |
+
"model.language_model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 212 |
+
"model.language_model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 213 |
+
"model.language_model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 214 |
+
"model.language_model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 215 |
+
"model.language_model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 216 |
+
"model.language_model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 217 |
+
"model.language_model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 218 |
+
"model.language_model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 219 |
+
"model.language_model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 220 |
+
"model.language_model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 221 |
+
"model.language_model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 222 |
+
"model.language_model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 223 |
+
"model.language_model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 224 |
+
"model.language_model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 225 |
+
"model.language_model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 226 |
+
"model.language_model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 227 |
+
"model.language_model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 228 |
+
"model.language_model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 229 |
+
"model.language_model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 230 |
+
"model.language_model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 231 |
+
"model.language_model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 232 |
+
"model.language_model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 233 |
+
"model.language_model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 234 |
+
"model.language_model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 235 |
+
"model.language_model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 236 |
+
"model.language_model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 237 |
+
"model.language_model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 238 |
+
"model.language_model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 239 |
+
"model.language_model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 240 |
+
"model.language_model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 241 |
+
"model.language_model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 242 |
+
"model.language_model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 243 |
+
"model.language_model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 244 |
+
"model.language_model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 245 |
+
"model.language_model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 246 |
+
"model.language_model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 247 |
+
"model.language_model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 248 |
+
"model.language_model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 249 |
+
"model.language_model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 250 |
+
"model.language_model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 251 |
+
"model.language_model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 252 |
+
"model.language_model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 253 |
+
"model.language_model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 254 |
+
"model.language_model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 255 |
+
"model.language_model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 256 |
+
"model.language_model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 257 |
+
"model.language_model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 258 |
+
"model.language_model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 259 |
+
"model.language_model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 260 |
+
"model.language_model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 261 |
+
"model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 262 |
+
"model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 263 |
+
"model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 264 |
+
"model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 265 |
+
"model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 266 |
+
"model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 267 |
+
"model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 268 |
+
"model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 269 |
+
"model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 270 |
+
"model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 271 |
+
"model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 272 |
+
"model.language_model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 273 |
+
"model.language_model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 274 |
+
"model.language_model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 275 |
+
"model.language_model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 276 |
+
"model.language_model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 277 |
+
"model.language_model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 278 |
+
"model.language_model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 279 |
+
"model.language_model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 280 |
+
"model.language_model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 281 |
+
"model.language_model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 282 |
+
"model.language_model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 283 |
+
"model.language_model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 284 |
+
"model.language_model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 285 |
+
"model.language_model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 286 |
+
"model.language_model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 287 |
+
"model.language_model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 288 |
+
"model.language_model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 289 |
+
"model.language_model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 290 |
+
"model.language_model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 291 |
+
"model.language_model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 292 |
+
"model.language_model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 293 |
+
"model.language_model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 294 |
+
"model.language_model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 295 |
+
"model.language_model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 296 |
+
"model.language_model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 297 |
+
"model.language_model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 298 |
+
"model.language_model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 299 |
+
"model.language_model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 300 |
+
"model.language_model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 301 |
+
"model.language_model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 302 |
+
"model.language_model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 303 |
+
"model.language_model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 304 |
+
"model.language_model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 305 |
+
"model.language_model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 306 |
+
"model.language_model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 307 |
+
"model.language_model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 308 |
+
"model.language_model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 309 |
+
"model.language_model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 310 |
+
"model.language_model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 311 |
+
"model.language_model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 312 |
+
"model.language_model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 313 |
+
"model.language_model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 314 |
+
"model.language_model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 315 |
+
"model.language_model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 316 |
+
"model.language_model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 317 |
+
"model.language_model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 318 |
+
"model.language_model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 319 |
+
"model.language_model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 320 |
+
"model.language_model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 321 |
+
"model.language_model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 322 |
+
"model.language_model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 323 |
+
"model.language_model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 324 |
+
"model.language_model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 325 |
+
"model.language_model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 326 |
+
"model.language_model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 327 |
+
"model.language_model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 328 |
+
"model.language_model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 329 |
+
"model.language_model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 330 |
+
"model.language_model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 331 |
+
"model.language_model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 332 |
+
"model.language_model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 333 |
+
"model.language_model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 334 |
+
"model.language_model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 335 |
+
"model.language_model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 336 |
+
"model.language_model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 337 |
+
"model.language_model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 338 |
+
"model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 339 |
+
"model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 340 |
+
"model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 341 |
+
"model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 342 |
+
"model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 343 |
+
"model.language_model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 344 |
+
"model.language_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 345 |
+
"model.language_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 346 |
+
"model.language_model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 347 |
+
"model.language_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 348 |
+
"model.language_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 349 |
+
"model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 350 |
+
"model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 351 |
+
"model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 352 |
+
"model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 353 |
+
"model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 354 |
+
"model.language_model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 355 |
+
"model.language_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 356 |
+
"model.language_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 357 |
+
"model.language_model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 358 |
+
"model.language_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 359 |
+
"model.language_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 360 |
+
"model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 361 |
+
"model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 362 |
+
"model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 363 |
+
"model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 364 |
+
"model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 365 |
+
"model.language_model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 366 |
+
"model.language_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 367 |
+
"model.language_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 368 |
+
"model.language_model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 369 |
+
"model.language_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 370 |
+
"model.language_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 371 |
+
"model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 372 |
+
"model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 373 |
+
"model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 374 |
+
"model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 375 |
+
"model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 376 |
+
"model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 377 |
+
"model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 378 |
+
"model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 379 |
+
"model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 380 |
+
"model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 381 |
+
"model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 382 |
+
"model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 383 |
+
"model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 384 |
+
"model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 385 |
+
"model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 386 |
+
"model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 387 |
+
"model.language_model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 388 |
+
"model.language_model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 389 |
+
"model.language_model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 390 |
+
"model.language_model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 391 |
+
"model.language_model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 392 |
+
"model.language_model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 393 |
+
"model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 394 |
+
"model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 395 |
+
"model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 396 |
+
"model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 397 |
+
"model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 398 |
+
"model.language_model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 399 |
+
"model.language_model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 400 |
+
"model.language_model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 401 |
+
"model.language_model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 402 |
+
"model.language_model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 403 |
+
"model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 404 |
+
"model.language_model.norm.weight": "model-00002-of-00002.safetensors",
|
| 405 |
+
"model.visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 406 |
+
"model.visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 407 |
+
"model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 408 |
+
"model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 409 |
+
"model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 410 |
+
"model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 411 |
+
"model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 412 |
+
"model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 413 |
+
"model.visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
|
| 414 |
+
"model.visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
|
| 415 |
+
"model.visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
|
| 416 |
+
"model.visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
|
| 417 |
+
"model.visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 418 |
+
"model.visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 419 |
+
"model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 420 |
+
"model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 421 |
+
"model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 422 |
+
"model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 423 |
+
"model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 424 |
+
"model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 425 |
+
"model.visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
|
| 426 |
+
"model.visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
|
| 427 |
+
"model.visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
|
| 428 |
+
"model.visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
|
| 429 |
+
"model.visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 430 |
+
"model.visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 431 |
+
"model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 432 |
+
"model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 433 |
+
"model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 434 |
+
"model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 435 |
+
"model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 436 |
+
"model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 437 |
+
"model.visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
|
| 438 |
+
"model.visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
|
| 439 |
+
"model.visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
|
| 440 |
+
"model.visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
|
| 441 |
+
"model.visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 442 |
+
"model.visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 443 |
+
"model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 444 |
+
"model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 445 |
+
"model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 446 |
+
"model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 447 |
+
"model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 448 |
+
"model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 449 |
+
"model.visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
|
| 450 |
+
"model.visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
|
| 451 |
+
"model.visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
|
| 452 |
+
"model.visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
|
| 453 |
+
"model.visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 454 |
+
"model.visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 455 |
+
"model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 456 |
+
"model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 457 |
+
"model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 458 |
+
"model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 459 |
+
"model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 460 |
+
"model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 461 |
+
"model.visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
|
| 462 |
+
"model.visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
|
| 463 |
+
"model.visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
|
| 464 |
+
"model.visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
|
| 465 |
+
"model.visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 466 |
+
"model.visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 467 |
+
"model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 468 |
+
"model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 469 |
+
"model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 470 |
+
"model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 471 |
+
"model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 472 |
+
"model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 473 |
+
"model.visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
|
| 474 |
+
"model.visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
|
| 475 |
+
"model.visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
|
| 476 |
+
"model.visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
|
| 477 |
+
"model.visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 478 |
+
"model.visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 479 |
+
"model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 480 |
+
"model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 481 |
+
"model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 482 |
+
"model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 483 |
+
"model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 484 |
+
"model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 485 |
+
"model.visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
|
| 486 |
+
"model.visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
|
| 487 |
+
"model.visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
|
| 488 |
+
"model.visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
|
| 489 |
+
"model.visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 490 |
+
"model.visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 491 |
+
"model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 492 |
+
"model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 493 |
+
"model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 494 |
+
"model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 495 |
+
"model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 496 |
+
"model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 497 |
+
"model.visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
|
| 498 |
+
"model.visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
|
| 499 |
+
"model.visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
|
| 500 |
+
"model.visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
|
| 501 |
+
"model.visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 502 |
+
"model.visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 503 |
+
"model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 504 |
+
"model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 505 |
+
"model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 506 |
+
"model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 507 |
+
"model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 508 |
+
"model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 509 |
+
"model.visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
|
| 510 |
+
"model.visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
|
| 511 |
+
"model.visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
|
| 512 |
+
"model.visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
|
| 513 |
+
"model.visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 514 |
+
"model.visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 515 |
+
"model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 516 |
+
"model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 517 |
+
"model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 518 |
+
"model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 519 |
+
"model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 520 |
+
"model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 521 |
+
"model.visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
|
| 522 |
+
"model.visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
|
| 523 |
+
"model.visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
|
| 524 |
+
"model.visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
|
| 525 |
+
"model.visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 526 |
+
"model.visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 527 |
+
"model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 528 |
+
"model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 529 |
+
"model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 530 |
+
"model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 531 |
+
"model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 532 |
+
"model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 533 |
+
"model.visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
|
| 534 |
+
"model.visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
|
| 535 |
+
"model.visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
|
| 536 |
+
"model.visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
|
| 537 |
+
"model.visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 538 |
+
"model.visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 539 |
+
"model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 540 |
+
"model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 541 |
+
"model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 542 |
+
"model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 543 |
+
"model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 544 |
+
"model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 545 |
+
"model.visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
|
| 546 |
+
"model.visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
|
| 547 |
+
"model.visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
|
| 548 |
+
"model.visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
|
| 549 |
+
"model.visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 550 |
+
"model.visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 551 |
+
"model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 552 |
+
"model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 553 |
+
"model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 554 |
+
"model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 555 |
+
"model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 556 |
+
"model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 557 |
+
"model.visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
|
| 558 |
+
"model.visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
|
| 559 |
+
"model.visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
|
| 560 |
+
"model.visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
|
| 561 |
+
"model.visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 562 |
+
"model.visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 563 |
+
"model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 564 |
+
"model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 565 |
+
"model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 566 |
+
"model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 567 |
+
"model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 568 |
+
"model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 569 |
+
"model.visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
|
| 570 |
+
"model.visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
|
| 571 |
+
"model.visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
|
| 572 |
+
"model.visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
|
| 573 |
+
"model.visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 574 |
+
"model.visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 575 |
+
"model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 576 |
+
"model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 577 |
+
"model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 578 |
+
"model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 579 |
+
"model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 580 |
+
"model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 581 |
+
"model.visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
|
| 582 |
+
"model.visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
|
| 583 |
+
"model.visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
|
| 584 |
+
"model.visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
|
| 585 |
+
"model.visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 586 |
+
"model.visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 587 |
+
"model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 588 |
+
"model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 589 |
+
"model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 590 |
+
"model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 591 |
+
"model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 592 |
+
"model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 593 |
+
"model.visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
|
| 594 |
+
"model.visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
|
| 595 |
+
"model.visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
|
| 596 |
+
"model.visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
|
| 597 |
+
"model.visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 598 |
+
"model.visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 599 |
+
"model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 600 |
+
"model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 601 |
+
"model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 602 |
+
"model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 603 |
+
"model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 604 |
+
"model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 605 |
+
"model.visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
|
| 606 |
+
"model.visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
|
| 607 |
+
"model.visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
|
| 608 |
+
"model.visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
|
| 609 |
+
"model.visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 610 |
+
"model.visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 611 |
+
"model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 612 |
+
"model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 613 |
+
"model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 614 |
+
"model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 615 |
+
"model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 616 |
+
"model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 617 |
+
"model.visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
|
| 618 |
+
"model.visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
|
| 619 |
+
"model.visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
|
| 620 |
+
"model.visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
|
| 621 |
+
"model.visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 622 |
+
"model.visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 623 |
+
"model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 624 |
+
"model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 625 |
+
"model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 626 |
+
"model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 627 |
+
"model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 628 |
+
"model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 629 |
+
"model.visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
|
| 630 |
+
"model.visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
|
| 631 |
+
"model.visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
|
| 632 |
+
"model.visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
|
| 633 |
+
"model.visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 634 |
+
"model.visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 635 |
+
"model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 636 |
+
"model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 637 |
+
"model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 638 |
+
"model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 639 |
+
"model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 640 |
+
"model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 641 |
+
"model.visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
|
| 642 |
+
"model.visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
|
| 643 |
+
"model.visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
|
| 644 |
+
"model.visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
|
| 645 |
+
"model.visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 646 |
+
"model.visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 647 |
+
"model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 648 |
+
"model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 649 |
+
"model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 650 |
+
"model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 651 |
+
"model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 652 |
+
"model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 653 |
+
"model.visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
|
| 654 |
+
"model.visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
|
| 655 |
+
"model.visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
|
| 656 |
+
"model.visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
|
| 657 |
+
"model.visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 658 |
+
"model.visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 659 |
+
"model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 660 |
+
"model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 661 |
+
"model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 662 |
+
"model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 663 |
+
"model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 664 |
+
"model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 665 |
+
"model.visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
|
| 666 |
+
"model.visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
|
| 667 |
+
"model.visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
|
| 668 |
+
"model.visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
|
| 669 |
+
"model.visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 670 |
+
"model.visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 671 |
+
"model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 672 |
+
"model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 673 |
+
"model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 674 |
+
"model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 675 |
+
"model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 676 |
+
"model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 677 |
+
"model.visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
|
| 678 |
+
"model.visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
|
| 679 |
+
"model.visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
|
| 680 |
+
"model.visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
|
| 681 |
+
"model.visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
|
| 682 |
+
"model.visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 683 |
+
"model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
| 684 |
+
"model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 685 |
+
"model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 686 |
+
"model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 687 |
+
"model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 688 |
+
"model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 689 |
+
"model.visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
|
| 690 |
+
"model.visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
|
| 691 |
+
"model.visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
|
| 692 |
+
"model.visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
|
| 693 |
+
"model.visual.deepstack_merger_list.0.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 694 |
+
"model.visual.deepstack_merger_list.0.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 695 |
+
"model.visual.deepstack_merger_list.0.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 696 |
+
"model.visual.deepstack_merger_list.0.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 697 |
+
"model.visual.deepstack_merger_list.0.norm.bias": "model-00001-of-00002.safetensors",
|
| 698 |
+
"model.visual.deepstack_merger_list.0.norm.weight": "model-00001-of-00002.safetensors",
|
| 699 |
+
"model.visual.deepstack_merger_list.1.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 700 |
+
"model.visual.deepstack_merger_list.1.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 701 |
+
"model.visual.deepstack_merger_list.1.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 702 |
+
"model.visual.deepstack_merger_list.1.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 703 |
+
"model.visual.deepstack_merger_list.1.norm.bias": "model-00001-of-00002.safetensors",
|
| 704 |
+
"model.visual.deepstack_merger_list.1.norm.weight": "model-00001-of-00002.safetensors",
|
| 705 |
+
"model.visual.deepstack_merger_list.2.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 706 |
+
"model.visual.deepstack_merger_list.2.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 707 |
+
"model.visual.deepstack_merger_list.2.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 708 |
+
"model.visual.deepstack_merger_list.2.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 709 |
+
"model.visual.deepstack_merger_list.2.norm.bias": "model-00001-of-00002.safetensors",
|
| 710 |
+
"model.visual.deepstack_merger_list.2.norm.weight": "model-00001-of-00002.safetensors",
|
| 711 |
+
"model.visual.merger.linear_fc1.bias": "model-00001-of-00002.safetensors",
|
| 712 |
+
"model.visual.merger.linear_fc1.weight": "model-00001-of-00002.safetensors",
|
| 713 |
+
"model.visual.merger.linear_fc2.bias": "model-00001-of-00002.safetensors",
|
| 714 |
+
"model.visual.merger.linear_fc2.weight": "model-00001-of-00002.safetensors",
|
| 715 |
+
"model.visual.merger.norm.bias": "model-00001-of-00002.safetensors",
|
| 716 |
+
"model.visual.merger.norm.weight": "model-00001-of-00002.safetensors",
|
| 717 |
+
"model.visual.patch_embed.proj.bias": "model-00001-of-00002.safetensors",
|
| 718 |
+
"model.visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors",
|
| 719 |
+
"model.visual.pos_embed.weight": "model-00001-of-00002.safetensors"
|
| 720 |
+
}
|
| 721 |
+
}
|
checkpoint-200/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cb7a4b7c226672e602829f949c9b949a2100554d0dc09e825c51a9143e6ccc3
|
| 3 |
+
size 16090226537
|
checkpoint-200/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14663a8b6302ff6f49bacc4b5a26646acf0712a3e1060b12030b8f15b965d9c0
|
| 3 |
+
size 15365
|
checkpoint-200/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76690382dbb28257944d46b11408a51ba6c0baac29e945c9d4090c25d5124dfc
|
| 3 |
+
size 15365
|
checkpoint-200/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecec2988069a64d5c7b627c18f9347fcc2a9ccc3a29e32a2416f5901915e8aa9
|
| 3 |
+
size 15429
|
checkpoint-200/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c99d727fea87ed28fe611f12e5764dca58acd2d7eeb6a17cddc30d9779311149
|
| 3 |
+
size 15429
|
checkpoint-200/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58f22504a9a9649f4dc5ccb3e17d22d4cecfea1c126fee54296e90a97f833ed1
|
| 3 |
+
size 1465
|
checkpoint-200/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
checkpoint-200/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67cc0080ffd7555f723f423c27cfef314e1ad9d335c8b79f465c5faba1ed478b
|
| 3 |
+
size 11422821
|
checkpoint-200/tokenizer_config.json
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"clean_up_tokenization_spaces": false,
|
| 231 |
+
"eos_token": "<|im_end|>",
|
| 232 |
+
"errors": "replace",
|
| 233 |
+
"extra_special_tokens": {},
|
| 234 |
+
"max_length": null,
|
| 235 |
+
"model_max_length": 262144,
|
| 236 |
+
"pad_to_multiple_of": null,
|
| 237 |
+
"pad_token": "<|endoftext|>",
|
| 238 |
+
"pad_token_type_id": 0,
|
| 239 |
+
"padding_side": "left",
|
| 240 |
+
"processor_class": "Qwen3VLProcessor",
|
| 241 |
+
"split_special_tokens": false,
|
| 242 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 243 |
+
"unk_token": null
|
| 244 |
+
}
|
checkpoint-200/trainer_state.json
ADDED
|
@@ -0,0 +1,650 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.042863266180882986,
|
| 6 |
+
"eval_steps": 72,
|
| 7 |
+
"global_step": 200,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"clip_ratio/high_max": 0.0,
|
| 14 |
+
"clip_ratio/high_mean": 0.0,
|
| 15 |
+
"clip_ratio/low_mean": 0.0,
|
| 16 |
+
"clip_ratio/low_min": 0.0,
|
| 17 |
+
"clip_ratio/region_mean": 0.0,
|
| 18 |
+
"completions/clipped_ratio": 0.0,
|
| 19 |
+
"completions/max_length": 728.3,
|
| 20 |
+
"completions/max_terminated_length": 728.3,
|
| 21 |
+
"completions/mean_length": 690.4875061035157,
|
| 22 |
+
"completions/mean_terminated_length": 690.4875061035157,
|
| 23 |
+
"completions/min_length": 651.7,
|
| 24 |
+
"completions/min_terminated_length": 651.7,
|
| 25 |
+
"entropy": 0.5155309438705444,
|
| 26 |
+
"epoch": 0.0021431633090441492,
|
| 27 |
+
"frac_reward_zero_std": 0.0,
|
| 28 |
+
"grad_norm": 0.953125,
|
| 29 |
+
"kl": 0.0009624507569242269,
|
| 30 |
+
"learning_rate": 9.691780821917808e-07,
|
| 31 |
+
"loss": -0.0006,
|
| 32 |
+
"num_tokens": 997482.0,
|
| 33 |
+
"reward": 0.017473320267163216,
|
| 34 |
+
"reward_std": 0.03684660438448191,
|
| 35 |
+
"rewards/ADEnReward/mean": 0.017473319987766446,
|
| 36 |
+
"rewards/ADEnReward/std": 0.04930391618981957,
|
| 37 |
+
"step": 10,
|
| 38 |
+
"step_time": 30.737299674004316
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"clip_ratio/high_max": 0.0,
|
| 42 |
+
"clip_ratio/high_mean": 0.0,
|
| 43 |
+
"clip_ratio/low_mean": 0.0,
|
| 44 |
+
"clip_ratio/low_min": 0.0,
|
| 45 |
+
"clip_ratio/region_mean": 0.0,
|
| 46 |
+
"completions/clipped_ratio": 0.0,
|
| 47 |
+
"completions/max_length": 726.3,
|
| 48 |
+
"completions/max_terminated_length": 726.3,
|
| 49 |
+
"completions/mean_length": 689.114599609375,
|
| 50 |
+
"completions/mean_terminated_length": 689.114599609375,
|
| 51 |
+
"completions/min_length": 654.4,
|
| 52 |
+
"completions/min_terminated_length": 654.4,
|
| 53 |
+
"entropy": 0.5154049336910248,
|
| 54 |
+
"epoch": 0.0042863266180882984,
|
| 55 |
+
"frac_reward_zero_std": 0.0,
|
| 56 |
+
"grad_norm": 1.0859375,
|
| 57 |
+
"kl": 0.001545107248239219,
|
| 58 |
+
"learning_rate": 9.349315068493149e-07,
|
| 59 |
+
"loss": -0.002,
|
| 60 |
+
"num_tokens": 1994385.0,
|
| 61 |
+
"reward": 0.02515518048312515,
|
| 62 |
+
"reward_std": 0.051526497805025426,
|
| 63 |
+
"rewards/ADEnReward/mean": 0.025155179359717294,
|
| 64 |
+
"rewards/ADEnReward/std": 0.06882240404374898,
|
| 65 |
+
"step": 20,
|
| 66 |
+
"step_time": 30.774630016833544
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"clip_ratio/high_max": 0.0,
|
| 70 |
+
"clip_ratio/high_mean": 0.0,
|
| 71 |
+
"clip_ratio/low_mean": 0.0,
|
| 72 |
+
"clip_ratio/low_min": 0.0,
|
| 73 |
+
"clip_ratio/region_mean": 0.0,
|
| 74 |
+
"completions/clipped_ratio": 0.0,
|
| 75 |
+
"completions/max_length": 727.7,
|
| 76 |
+
"completions/max_terminated_length": 727.7,
|
| 77 |
+
"completions/mean_length": 687.0104431152344,
|
| 78 |
+
"completions/mean_terminated_length": 687.0104431152344,
|
| 79 |
+
"completions/min_length": 647.7,
|
| 80 |
+
"completions/min_terminated_length": 647.7,
|
| 81 |
+
"entropy": 0.5073866218328476,
|
| 82 |
+
"epoch": 0.006429489927132447,
|
| 83 |
+
"frac_reward_zero_std": 0.0,
|
| 84 |
+
"grad_norm": 0.84375,
|
| 85 |
+
"kl": 0.00105394265265204,
|
| 86 |
+
"learning_rate": 9.006849315068494e-07,
|
| 87 |
+
"loss": -0.001,
|
| 88 |
+
"num_tokens": 2989654.0,
|
| 89 |
+
"reward": 0.02013225699774921,
|
| 90 |
+
"reward_std": 0.039358591521158816,
|
| 91 |
+
"rewards/ADEnReward/mean": 0.020132256811484693,
|
| 92 |
+
"rewards/ADEnReward/std": 0.054149537533521655,
|
| 93 |
+
"step": 30,
|
| 94 |
+
"step_time": 30.921874072402716
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"clip_ratio/high_max": 0.0,
|
| 98 |
+
"clip_ratio/high_mean": 0.0,
|
| 99 |
+
"clip_ratio/low_mean": 0.0,
|
| 100 |
+
"clip_ratio/low_min": 0.0,
|
| 101 |
+
"clip_ratio/region_mean": 0.0,
|
| 102 |
+
"completions/clipped_ratio": 0.0,
|
| 103 |
+
"completions/max_length": 730.3,
|
| 104 |
+
"completions/max_terminated_length": 730.3,
|
| 105 |
+
"completions/mean_length": 691.108349609375,
|
| 106 |
+
"completions/mean_terminated_length": 691.108349609375,
|
| 107 |
+
"completions/min_length": 655.2,
|
| 108 |
+
"completions/min_terminated_length": 655.2,
|
| 109 |
+
"entropy": 0.5206563144922256,
|
| 110 |
+
"epoch": 0.008572653236176597,
|
| 111 |
+
"frac_reward_zero_std": 0.0,
|
| 112 |
+
"grad_norm": 0.8515625,
|
| 113 |
+
"kl": 0.001020824775332585,
|
| 114 |
+
"learning_rate": 8.664383561643836e-07,
|
| 115 |
+
"loss": -0.0003,
|
| 116 |
+
"num_tokens": 3987498.0,
|
| 117 |
+
"reward": 0.022847729618661106,
|
| 118 |
+
"reward_std": 0.05116785345599055,
|
| 119 |
+
"rewards/ADEnReward/mean": 0.022847728175111114,
|
| 120 |
+
"rewards/ADEnReward/std": 0.06853441912680865,
|
| 121 |
+
"step": 40,
|
| 122 |
+
"step_time": 30.637446269392967
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"clip_ratio/high_max": 0.0,
|
| 126 |
+
"clip_ratio/high_mean": 0.0,
|
| 127 |
+
"clip_ratio/low_mean": 0.0,
|
| 128 |
+
"clip_ratio/low_min": 0.0,
|
| 129 |
+
"clip_ratio/region_mean": 0.0,
|
| 130 |
+
"completions/clipped_ratio": 0.0,
|
| 131 |
+
"completions/max_length": 727.2,
|
| 132 |
+
"completions/max_terminated_length": 727.2,
|
| 133 |
+
"completions/mean_length": 687.1041870117188,
|
| 134 |
+
"completions/mean_terminated_length": 687.1041870117188,
|
| 135 |
+
"completions/min_length": 655.1,
|
| 136 |
+
"completions/min_terminated_length": 655.1,
|
| 137 |
+
"entropy": 0.5212983191013336,
|
| 138 |
+
"epoch": 0.010715816545220747,
|
| 139 |
+
"frac_reward_zero_std": 0.0,
|
| 140 |
+
"grad_norm": 1.0,
|
| 141 |
+
"kl": 0.0006436160067096353,
|
| 142 |
+
"learning_rate": 8.321917808219178e-07,
|
| 143 |
+
"loss": -0.0,
|
| 144 |
+
"num_tokens": 4983244.0,
|
| 145 |
+
"reward": 0.02881853673607111,
|
| 146 |
+
"reward_std": 0.06065227556973696,
|
| 147 |
+
"rewards/ADEnReward/mean": 0.028818535897880793,
|
| 148 |
+
"rewards/ADEnReward/std": 0.07447688719257713,
|
| 149 |
+
"step": 50,
|
| 150 |
+
"step_time": 30.621672691404818
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"clip_ratio/high_max": 0.0,
|
| 154 |
+
"clip_ratio/high_mean": 0.0,
|
| 155 |
+
"clip_ratio/low_mean": 0.0,
|
| 156 |
+
"clip_ratio/low_min": 0.0,
|
| 157 |
+
"clip_ratio/region_mean": 0.0,
|
| 158 |
+
"completions/clipped_ratio": 0.0,
|
| 159 |
+
"completions/max_length": 720.9,
|
| 160 |
+
"completions/max_terminated_length": 720.9,
|
| 161 |
+
"completions/mean_length": 689.1104431152344,
|
| 162 |
+
"completions/mean_terminated_length": 689.1104431152344,
|
| 163 |
+
"completions/min_length": 658.7,
|
| 164 |
+
"completions/min_terminated_length": 658.7,
|
| 165 |
+
"entropy": 0.5353182137012482,
|
| 166 |
+
"epoch": 0.012858979854264894,
|
| 167 |
+
"frac_reward_zero_std": 0.0,
|
| 168 |
+
"grad_norm": 0.88671875,
|
| 169 |
+
"kl": 0.0006802360760048032,
|
| 170 |
+
"learning_rate": 7.97945205479452e-07,
|
| 171 |
+
"loss": 0.0004,
|
| 172 |
+
"num_tokens": 5979953.0,
|
| 173 |
+
"reward": 0.021031666733324527,
|
| 174 |
+
"reward_std": 0.04476789850741625,
|
| 175 |
+
"rewards/ADEnReward/mean": 0.02103166626766324,
|
| 176 |
+
"rewards/ADEnReward/std": 0.060725963488221166,
|
| 177 |
+
"step": 60,
|
| 178 |
+
"step_time": 30.693003302812578
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"clip_ratio/high_max": 0.0,
|
| 182 |
+
"clip_ratio/high_mean": 0.0,
|
| 183 |
+
"clip_ratio/low_mean": 0.0,
|
| 184 |
+
"clip_ratio/low_min": 0.0,
|
| 185 |
+
"clip_ratio/region_mean": 0.0,
|
| 186 |
+
"completions/clipped_ratio": 0.0,
|
| 187 |
+
"completions/max_length": 729.1,
|
| 188 |
+
"completions/max_terminated_length": 729.1,
|
| 189 |
+
"completions/mean_length": 690.0625183105469,
|
| 190 |
+
"completions/mean_terminated_length": 690.0625183105469,
|
| 191 |
+
"completions/min_length": 655.5,
|
| 192 |
+
"completions/min_terminated_length": 655.5,
|
| 193 |
+
"entropy": 0.5152539879083633,
|
| 194 |
+
"epoch": 0.015002143163309044,
|
| 195 |
+
"frac_reward_zero_std": 0.0,
|
| 196 |
+
"grad_norm": 0.71875,
|
| 197 |
+
"kl": 0.0006728454434778541,
|
| 198 |
+
"learning_rate": 7.636986301369863e-07,
|
| 199 |
+
"loss": 0.0006,
|
| 200 |
+
"num_tokens": 6977375.0,
|
| 201 |
+
"reward": 0.02499701709020883,
|
| 202 |
+
"reward_std": 0.052732100058346984,
|
| 203 |
+
"rewards/ADEnReward/mean": 0.02499701699707657,
|
| 204 |
+
"rewards/ADEnReward/std": 0.06445601325249299,
|
| 205 |
+
"step": 70,
|
| 206 |
+
"step_time": 30.69759728834033
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.015430775825117874,
|
| 210 |
+
"eval_clip_ratio/high_max": 0.0,
|
| 211 |
+
"eval_clip_ratio/high_mean": 0.0,
|
| 212 |
+
"eval_clip_ratio/low_mean": 0.0,
|
| 213 |
+
"eval_clip_ratio/low_min": 0.0,
|
| 214 |
+
"eval_clip_ratio/region_mean": 0.0,
|
| 215 |
+
"eval_completions/clipped_ratio": 0.0003109452828987321,
|
| 216 |
+
"eval_completions/max_length": 734.5223880597015,
|
| 217 |
+
"eval_completions/max_terminated_length": 730.1343283582089,
|
| 218 |
+
"eval_completions/mean_length": 691.7577942805503,
|
| 219 |
+
"eval_completions/mean_terminated_length": 691.6533622172342,
|
| 220 |
+
"eval_completions/min_length": 655.8805970149253,
|
| 221 |
+
"eval_completions/min_terminated_length": 655.8805970149253,
|
| 222 |
+
"eval_entropy": 0.5390982583387575,
|
| 223 |
+
"eval_frac_reward_zero_std": 0.009950249052759427,
|
| 224 |
+
"eval_kl": 0.0007209435779490132,
|
| 225 |
+
"eval_loss": -0.0006809970363974571,
|
| 226 |
+
"eval_num_tokens": 7175890.0,
|
| 227 |
+
"eval_reward": 0.014476228026245763,
|
| 228 |
+
"eval_reward_std": 0.02974726758991132,
|
| 229 |
+
"eval_rewards/ADEnReward/mean": 0.01447622790266308,
|
| 230 |
+
"eval_rewards/ADEnReward/std": 0.04060708046908171,
|
| 231 |
+
"eval_runtime": 1940.8413,
|
| 232 |
+
"eval_samples_per_second": 0.103,
|
| 233 |
+
"eval_steps_per_second": 0.003,
|
| 234 |
+
"step": 72
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"clip_ratio/high_max": 0.0,
|
| 238 |
+
"clip_ratio/high_mean": 0.0,
|
| 239 |
+
"clip_ratio/low_mean": 0.0,
|
| 240 |
+
"clip_ratio/low_min": 0.0,
|
| 241 |
+
"clip_ratio/region_mean": 0.0,
|
| 242 |
+
"completions/clipped_ratio": 0.0,
|
| 243 |
+
"completions/max_length": 724.8,
|
| 244 |
+
"completions/max_terminated_length": 724.8,
|
| 245 |
+
"completions/mean_length": 685.8041809082031,
|
| 246 |
+
"completions/mean_terminated_length": 685.8041809082031,
|
| 247 |
+
"completions/min_length": 646.9,
|
| 248 |
+
"completions/min_terminated_length": 646.9,
|
| 249 |
+
"entropy": 0.5170553356409073,
|
| 250 |
+
"epoch": 0.017145306472353194,
|
| 251 |
+
"frac_reward_zero_std": 0.0,
|
| 252 |
+
"grad_norm": 0.76953125,
|
| 253 |
+
"kl": 0.0006296096777077764,
|
| 254 |
+
"learning_rate": 7.294520547945205e-07,
|
| 255 |
+
"loss": -0.0001,
|
| 256 |
+
"num_tokens": 7972113.0,
|
| 257 |
+
"reward": 0.027504462655633687,
|
| 258 |
+
"reward_std": 0.05410240553319454,
|
| 259 |
+
"rewards/ADEnReward/mean": 0.027504462469369172,
|
| 260 |
+
"rewards/ADEnReward/std": 0.07011906299740076,
|
| 261 |
+
"step": 80,
|
| 262 |
+
"step_time": 29.781939852237702
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"clip_ratio/high_max": 0.0,
|
| 266 |
+
"clip_ratio/high_mean": 0.0,
|
| 267 |
+
"clip_ratio/low_mean": 0.0,
|
| 268 |
+
"clip_ratio/low_min": 0.0,
|
| 269 |
+
"clip_ratio/region_mean": 0.0,
|
| 270 |
+
"completions/clipped_ratio": 0.0,
|
| 271 |
+
"completions/max_length": 729.1,
|
| 272 |
+
"completions/max_terminated_length": 729.1,
|
| 273 |
+
"completions/mean_length": 692.3312622070313,
|
| 274 |
+
"completions/mean_terminated_length": 692.3312622070313,
|
| 275 |
+
"completions/min_length": 661.3,
|
| 276 |
+
"completions/min_terminated_length": 661.3,
|
| 277 |
+
"entropy": 0.5159202754497528,
|
| 278 |
+
"epoch": 0.01928846978139734,
|
| 279 |
+
"frac_reward_zero_std": 0.0,
|
| 280 |
+
"grad_norm": 0.95703125,
|
| 281 |
+
"kl": 0.0007557698409073055,
|
| 282 |
+
"learning_rate": 6.952054794520548e-07,
|
| 283 |
+
"loss": 0.0003,
|
| 284 |
+
"num_tokens": 8971120.0,
|
| 285 |
+
"reward": 0.031002284376882017,
|
| 286 |
+
"reward_std": 0.058332843240350485,
|
| 287 |
+
"rewards/ADEnReward/mean": 0.03100228389375843,
|
| 288 |
+
"rewards/ADEnReward/std": 0.06421787270810456,
|
| 289 |
+
"step": 90,
|
| 290 |
+
"step_time": 29.85468419864774
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"clip_ratio/high_max": 0.0,
|
| 294 |
+
"clip_ratio/high_mean": 0.0,
|
| 295 |
+
"clip_ratio/low_mean": 0.0,
|
| 296 |
+
"clip_ratio/low_min": 0.0,
|
| 297 |
+
"clip_ratio/region_mean": 0.0,
|
| 298 |
+
"completions/clipped_ratio": 0.0,
|
| 299 |
+
"completions/max_length": 728.2,
|
| 300 |
+
"completions/max_terminated_length": 728.2,
|
| 301 |
+
"completions/mean_length": 689.6791931152344,
|
| 302 |
+
"completions/mean_terminated_length": 689.6791931152344,
|
| 303 |
+
"completions/min_length": 655.0,
|
| 304 |
+
"completions/min_terminated_length": 655.0,
|
| 305 |
+
"entropy": 0.524771922826767,
|
| 306 |
+
"epoch": 0.021431633090441493,
|
| 307 |
+
"frac_reward_zero_std": 0.0,
|
| 308 |
+
"grad_norm": 1.375,
|
| 309 |
+
"kl": 0.000729308504378423,
|
| 310 |
+
"learning_rate": 6.60958904109589e-07,
|
| 311 |
+
"loss": -0.0009,
|
| 312 |
+
"num_tokens": 9968310.0,
|
| 313 |
+
"reward": 0.03060391815379262,
|
| 314 |
+
"reward_std": 0.04884043680503965,
|
| 315 |
+
"rewards/ADEnReward/mean": 0.03060391787439585,
|
| 316 |
+
"rewards/ADEnReward/std": 0.07001540651544928,
|
| 317 |
+
"step": 100,
|
| 318 |
+
"step_time": 29.800767001509666
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"clip_ratio/high_max": 0.0,
|
| 322 |
+
"clip_ratio/high_mean": 0.0,
|
| 323 |
+
"clip_ratio/low_mean": 0.0,
|
| 324 |
+
"clip_ratio/low_min": 0.0,
|
| 325 |
+
"clip_ratio/region_mean": 0.0,
|
| 326 |
+
"completions/clipped_ratio": 0.0,
|
| 327 |
+
"completions/max_length": 730.6,
|
| 328 |
+
"completions/max_terminated_length": 730.6,
|
| 329 |
+
"completions/mean_length": 689.6666809082031,
|
| 330 |
+
"completions/mean_terminated_length": 689.6666809082031,
|
| 331 |
+
"completions/min_length": 650.4,
|
| 332 |
+
"completions/min_terminated_length": 650.4,
|
| 333 |
+
"entropy": 0.5282228320837021,
|
| 334 |
+
"epoch": 0.02357479639948564,
|
| 335 |
+
"frac_reward_zero_std": 0.0,
|
| 336 |
+
"grad_norm": 0.8984375,
|
| 337 |
+
"kl": 0.0007196089718490839,
|
| 338 |
+
"learning_rate": 6.267123287671232e-07,
|
| 339 |
+
"loss": -0.001,
|
| 340 |
+
"num_tokens": 10965894.0,
|
| 341 |
+
"reward": 0.01836783888284117,
|
| 342 |
+
"reward_std": 0.03968090345151722,
|
| 343 |
+
"rewards/ADEnReward/mean": 0.018367838417179882,
|
| 344 |
+
"rewards/ADEnReward/std": 0.05097979507409036,
|
| 345 |
+
"step": 110,
|
| 346 |
+
"step_time": 29.778396278619766
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"clip_ratio/high_max": 0.0,
|
| 350 |
+
"clip_ratio/high_mean": 0.0,
|
| 351 |
+
"clip_ratio/low_mean": 0.0,
|
| 352 |
+
"clip_ratio/low_min": 0.0,
|
| 353 |
+
"clip_ratio/region_mean": 0.0,
|
| 354 |
+
"completions/clipped_ratio": 0.0,
|
| 355 |
+
"completions/max_length": 723.2,
|
| 356 |
+
"completions/max_terminated_length": 723.2,
|
| 357 |
+
"completions/mean_length": 685.7271057128906,
|
| 358 |
+
"completions/mean_terminated_length": 685.7271057128906,
|
| 359 |
+
"completions/min_length": 645.2,
|
| 360 |
+
"completions/min_terminated_length": 645.2,
|
| 361 |
+
"entropy": 0.5005072504281998,
|
| 362 |
+
"epoch": 0.02571795970852979,
|
| 363 |
+
"frac_reward_zero_std": 0.0,
|
| 364 |
+
"grad_norm": 0.98046875,
|
| 365 |
+
"kl": 0.0006541690701851622,
|
| 366 |
+
"learning_rate": 5.924657534246575e-07,
|
| 367 |
+
"loss": -0.0007,
|
| 368 |
+
"num_tokens": 11960243.0,
|
| 369 |
+
"reward": 0.023174144513905048,
|
| 370 |
+
"reward_std": 0.04275461677461863,
|
| 371 |
+
"rewards/ADEnReward/mean": 0.023174144234508275,
|
| 372 |
+
"rewards/ADEnReward/std": 0.05846338244155049,
|
| 373 |
+
"step": 120,
|
| 374 |
+
"step_time": 30.732146510481833
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"clip_ratio/high_max": 0.0,
|
| 378 |
+
"clip_ratio/high_mean": 0.0,
|
| 379 |
+
"clip_ratio/low_mean": 0.0,
|
| 380 |
+
"clip_ratio/low_min": 0.0,
|
| 381 |
+
"clip_ratio/region_mean": 0.0,
|
| 382 |
+
"completions/clipped_ratio": 0.0,
|
| 383 |
+
"completions/max_length": 730.1,
|
| 384 |
+
"completions/max_terminated_length": 730.1,
|
| 385 |
+
"completions/mean_length": 688.1521057128906,
|
| 386 |
+
"completions/mean_terminated_length": 688.1521057128906,
|
| 387 |
+
"completions/min_length": 650.5,
|
| 388 |
+
"completions/min_terminated_length": 650.5,
|
| 389 |
+
"entropy": 0.5175260335206986,
|
| 390 |
+
"epoch": 0.02786112301757394,
|
| 391 |
+
"frac_reward_zero_std": 0.0,
|
| 392 |
+
"grad_norm": 0.84765625,
|
| 393 |
+
"kl": 0.0007244604057632387,
|
| 394 |
+
"learning_rate": 5.582191780821918e-07,
|
| 395 |
+
"loss": -0.0007,
|
| 396 |
+
"num_tokens": 12956332.0,
|
| 397 |
+
"reward": 0.023879510536789893,
|
| 398 |
+
"reward_std": 0.05093174697831273,
|
| 399 |
+
"rewards/ADEnReward/mean": 0.023879510583356023,
|
| 400 |
+
"rewards/ADEnReward/std": 0.06644700225442648,
|
| 401 |
+
"step": 130,
|
| 402 |
+
"step_time": 30.819495313614606
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"clip_ratio/high_max": 0.0,
|
| 406 |
+
"clip_ratio/high_mean": 0.0,
|
| 407 |
+
"clip_ratio/low_mean": 0.0,
|
| 408 |
+
"clip_ratio/low_min": 0.0,
|
| 409 |
+
"clip_ratio/region_mean": 0.0,
|
| 410 |
+
"completions/clipped_ratio": 0.0,
|
| 411 |
+
"completions/max_length": 727.9,
|
| 412 |
+
"completions/max_terminated_length": 727.9,
|
| 413 |
+
"completions/mean_length": 690.083349609375,
|
| 414 |
+
"completions/mean_terminated_length": 690.083349609375,
|
| 415 |
+
"completions/min_length": 650.7,
|
| 416 |
+
"completions/min_terminated_length": 650.7,
|
| 417 |
+
"entropy": 0.5213184654712677,
|
| 418 |
+
"epoch": 0.03000428632661809,
|
| 419 |
+
"frac_reward_zero_std": 0.0,
|
| 420 |
+
"grad_norm": 1.0078125,
|
| 421 |
+
"kl": 0.0007580458710435778,
|
| 422 |
+
"learning_rate": 5.23972602739726e-07,
|
| 423 |
+
"loss": -0.0001,
|
| 424 |
+
"num_tokens": 13953844.0,
|
| 425 |
+
"reward": 0.022881961753591896,
|
| 426 |
+
"reward_std": 0.045093757659196855,
|
| 427 |
+
"rewards/ADEnReward/mean": 0.022881961474195123,
|
| 428 |
+
"rewards/ADEnReward/std": 0.06383953038603067,
|
| 429 |
+
"step": 140,
|
| 430 |
+
"step_time": 30.626396916806698
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.030861551650235748,
|
| 434 |
+
"eval_clip_ratio/high_max": 0.0,
|
| 435 |
+
"eval_clip_ratio/high_mean": 0.0,
|
| 436 |
+
"eval_clip_ratio/low_mean": 0.0,
|
| 437 |
+
"eval_clip_ratio/low_min": 0.0,
|
| 438 |
+
"eval_clip_ratio/region_mean": 0.0,
|
| 439 |
+
"eval_completions/clipped_ratio": 0.0,
|
| 440 |
+
"eval_completions/max_length": 732.3134328358209,
|
| 441 |
+
"eval_completions/max_terminated_length": 732.3134328358209,
|
| 442 |
+
"eval_completions/mean_length": 691.8230955209305,
|
| 443 |
+
"eval_completions/mean_terminated_length": 691.8230955209305,
|
| 444 |
+
"eval_completions/min_length": 654.9850746268656,
|
| 445 |
+
"eval_completions/min_terminated_length": 654.9850746268656,
|
| 446 |
+
"eval_entropy": 0.5369591784121385,
|
| 447 |
+
"eval_frac_reward_zero_std": 0.01492537357913914,
|
| 448 |
+
"eval_kl": 0.000793086929566491,
|
| 449 |
+
"eval_loss": -0.0007029849803075194,
|
| 450 |
+
"eval_num_tokens": 14351729.0,
|
| 451 |
+
"eval_reward": 0.013461394588212803,
|
| 452 |
+
"eval_reward_std": 0.02743199842273077,
|
| 453 |
+
"eval_rewards/ADEnReward/mean": 0.013461394259165763,
|
| 454 |
+
"eval_rewards/ADEnReward/std": 0.03877570112835296,
|
| 455 |
+
"eval_runtime": 1948.8306,
|
| 456 |
+
"eval_samples_per_second": 0.103,
|
| 457 |
+
"eval_steps_per_second": 0.003,
|
| 458 |
+
"step": 144
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"clip_ratio/high_max": 0.0,
|
| 462 |
+
"clip_ratio/high_mean": 0.0,
|
| 463 |
+
"clip_ratio/low_mean": 0.0,
|
| 464 |
+
"clip_ratio/low_min": 0.0,
|
| 465 |
+
"clip_ratio/region_mean": 0.0,
|
| 466 |
+
"completions/clipped_ratio": 0.0,
|
| 467 |
+
"completions/max_length": 723.0,
|
| 468 |
+
"completions/max_terminated_length": 723.0,
|
| 469 |
+
"completions/mean_length": 687.202099609375,
|
| 470 |
+
"completions/mean_terminated_length": 687.202099609375,
|
| 471 |
+
"completions/min_length": 654.2,
|
| 472 |
+
"completions/min_terminated_length": 654.2,
|
| 473 |
+
"entropy": 0.5220573782920838,
|
| 474 |
+
"epoch": 0.03214744963566224,
|
| 475 |
+
"frac_reward_zero_std": 0.0,
|
| 476 |
+
"grad_norm": 0.9765625,
|
| 477 |
+
"kl": 0.0007521495630498975,
|
| 478 |
+
"learning_rate": 4.897260273972603e-07,
|
| 479 |
+
"loss": -0.0005,
|
| 480 |
+
"num_tokens": 14949749.0,
|
| 481 |
+
"reward": 0.025187284033745528,
|
| 482 |
+
"reward_std": 0.050345284026116134,
|
| 483 |
+
"rewards/ADEnReward/mean": 0.02518728272989392,
|
| 484 |
+
"rewards/ADEnReward/std": 0.062126817740499975,
|
| 485 |
+
"step": 150,
|
| 486 |
+
"step_time": 29.455334320664406
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"clip_ratio/high_max": 0.0,
|
| 490 |
+
"clip_ratio/high_mean": 0.0,
|
| 491 |
+
"clip_ratio/low_mean": 0.0,
|
| 492 |
+
"clip_ratio/low_min": 0.0,
|
| 493 |
+
"clip_ratio/region_mean": 0.0,
|
| 494 |
+
"completions/clipped_ratio": 0.0,
|
| 495 |
+
"completions/max_length": 730.8,
|
| 496 |
+
"completions/max_terminated_length": 730.8,
|
| 497 |
+
"completions/mean_length": 690.1666809082031,
|
| 498 |
+
"completions/mean_terminated_length": 690.1666809082031,
|
| 499 |
+
"completions/min_length": 656.0,
|
| 500 |
+
"completions/min_terminated_length": 656.0,
|
| 501 |
+
"entropy": 0.5066380023956298,
|
| 502 |
+
"epoch": 0.03429061294470639,
|
| 503 |
+
"frac_reward_zero_std": 0.0,
|
| 504 |
+
"grad_norm": 0.87109375,
|
| 505 |
+
"kl": 0.000771551247453317,
|
| 506 |
+
"learning_rate": 4.554794520547945e-07,
|
| 507 |
+
"loss": -0.0006,
|
| 508 |
+
"num_tokens": 15947205.0,
|
| 509 |
+
"reward": 0.026677155029028655,
|
| 510 |
+
"reward_std": 0.054913098365068434,
|
| 511 |
+
"rewards/ADEnReward/mean": 0.026677154656499624,
|
| 512 |
+
"rewards/ADEnReward/std": 0.07184574082493782,
|
| 513 |
+
"step": 160,
|
| 514 |
+
"step_time": 30.49247597977519
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"clip_ratio/high_max": 0.0,
|
| 518 |
+
"clip_ratio/high_mean": 0.0,
|
| 519 |
+
"clip_ratio/low_mean": 0.0,
|
| 520 |
+
"clip_ratio/low_min": 0.0,
|
| 521 |
+
"clip_ratio/region_mean": 0.0,
|
| 522 |
+
"completions/clipped_ratio": 0.0,
|
| 523 |
+
"completions/max_length": 725.1,
|
| 524 |
+
"completions/max_terminated_length": 725.1,
|
| 525 |
+
"completions/mean_length": 688.3104309082031,
|
| 526 |
+
"completions/mean_terminated_length": 688.3104309082031,
|
| 527 |
+
"completions/min_length": 653.2,
|
| 528 |
+
"completions/min_terminated_length": 653.2,
|
| 529 |
+
"entropy": 0.5117672741413116,
|
| 530 |
+
"epoch": 0.036433776253750536,
|
| 531 |
+
"frac_reward_zero_std": 0.0,
|
| 532 |
+
"grad_norm": 1.125,
|
| 533 |
+
"kl": 0.0007626405160408467,
|
| 534 |
+
"learning_rate": 4.212328767123288e-07,
|
| 535 |
+
"loss": 0.0002,
|
| 536 |
+
"num_tokens": 16943306.0,
|
| 537 |
+
"reward": 0.024906267691403628,
|
| 538 |
+
"reward_std": 0.0546476511284709,
|
| 539 |
+
"rewards/ADEnReward/mean": 0.024906267132610082,
|
| 540 |
+
"rewards/ADEnReward/std": 0.06996704936027527,
|
| 541 |
+
"step": 170,
|
| 542 |
+
"step_time": 30.647633142769337
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"clip_ratio/high_max": 0.0,
|
| 546 |
+
"clip_ratio/high_mean": 0.0,
|
| 547 |
+
"clip_ratio/low_mean": 0.0,
|
| 548 |
+
"clip_ratio/low_min": 0.0,
|
| 549 |
+
"clip_ratio/region_mean": 0.0,
|
| 550 |
+
"completions/clipped_ratio": 0.0,
|
| 551 |
+
"completions/max_length": 724.6,
|
| 552 |
+
"completions/max_terminated_length": 724.6,
|
| 553 |
+
"completions/mean_length": 688.0041809082031,
|
| 554 |
+
"completions/mean_terminated_length": 688.0041809082031,
|
| 555 |
+
"completions/min_length": 652.1,
|
| 556 |
+
"completions/min_terminated_length": 652.1,
|
| 557 |
+
"entropy": 0.5105538904666901,
|
| 558 |
+
"epoch": 0.03857693956279468,
|
| 559 |
+
"frac_reward_zero_std": 0.0,
|
| 560 |
+
"grad_norm": 1.1484375,
|
| 561 |
+
"kl": 0.0007595276751089841,
|
| 562 |
+
"learning_rate": 3.86986301369863e-07,
|
| 563 |
+
"loss": -0.0003,
|
| 564 |
+
"num_tokens": 17939564.0,
|
| 565 |
+
"reward": 0.024842281267046927,
|
| 566 |
+
"reward_std": 0.05068598166108131,
|
| 567 |
+
"rewards/ADEnReward/mean": 0.02484228080138564,
|
| 568 |
+
"rewards/ADEnReward/std": 0.0639550257474184,
|
| 569 |
+
"step": 180,
|
| 570 |
+
"step_time": 30.279158672690393
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"clip_ratio/high_max": 0.0,
|
| 574 |
+
"clip_ratio/high_mean": 0.0,
|
| 575 |
+
"clip_ratio/low_mean": 0.0,
|
| 576 |
+
"clip_ratio/low_min": 0.0,
|
| 577 |
+
"clip_ratio/region_mean": 0.0,
|
| 578 |
+
"completions/clipped_ratio": 0.0,
|
| 579 |
+
"completions/max_length": 734.9,
|
| 580 |
+
"completions/max_terminated_length": 734.9,
|
| 581 |
+
"completions/mean_length": 690.4791809082031,
|
| 582 |
+
"completions/mean_terminated_length": 690.4791809082031,
|
| 583 |
+
"completions/min_length": 658.4,
|
| 584 |
+
"completions/min_terminated_length": 658.4,
|
| 585 |
+
"entropy": 0.5155089020729064,
|
| 586 |
+
"epoch": 0.04072010287183883,
|
| 587 |
+
"frac_reward_zero_std": 0.0,
|
| 588 |
+
"grad_norm": 0.703125,
|
| 589 |
+
"kl": 0.0007842136197723448,
|
| 590 |
+
"learning_rate": 3.5273972602739726e-07,
|
| 591 |
+
"loss": 0.0005,
|
| 592 |
+
"num_tokens": 18937442.0,
|
| 593 |
+
"reward": 0.028260859416332097,
|
| 594 |
+
"reward_std": 0.05661347545683384,
|
| 595 |
+
"rewards/ADEnReward/mean": 0.028260859136935323,
|
| 596 |
+
"rewards/ADEnReward/std": 0.07326696976087987,
|
| 597 |
+
"step": 190,
|
| 598 |
+
"step_time": 30.974527826905252
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"clip_ratio/high_max": 0.0,
|
| 602 |
+
"clip_ratio/high_mean": 0.0,
|
| 603 |
+
"clip_ratio/low_mean": 0.0,
|
| 604 |
+
"clip_ratio/low_min": 0.0,
|
| 605 |
+
"clip_ratio/region_mean": 0.0,
|
| 606 |
+
"completions/clipped_ratio": 0.0,
|
| 607 |
+
"completions/max_length": 726.7,
|
| 608 |
+
"completions/max_terminated_length": 726.7,
|
| 609 |
+
"completions/mean_length": 689.070849609375,
|
| 610 |
+
"completions/mean_terminated_length": 689.070849609375,
|
| 611 |
+
"completions/min_length": 656.3,
|
| 612 |
+
"completions/min_terminated_length": 656.3,
|
| 613 |
+
"entropy": 0.504573518037796,
|
| 614 |
+
"epoch": 0.042863266180882986,
|
| 615 |
+
"frac_reward_zero_std": 0.0,
|
| 616 |
+
"grad_norm": 0.6875,
|
| 617 |
+
"kl": 0.0006948979164008052,
|
| 618 |
+
"learning_rate": 3.1849315068493147e-07,
|
| 619 |
+
"loss": -0.0002,
|
| 620 |
+
"num_tokens": 19934084.0,
|
| 621 |
+
"reward": 0.03462931150570512,
|
| 622 |
+
"reward_std": 0.06458919309079647,
|
| 623 |
+
"rewards/ADEnReward/mean": 0.034629312343895435,
|
| 624 |
+
"rewards/ADEnReward/std": 0.0866784118115902,
|
| 625 |
+
"step": 200,
|
| 626 |
+
"step_time": 30.64783742129803
|
| 627 |
+
}
|
| 628 |
+
],
|
| 629 |
+
"logging_steps": 10,
|
| 630 |
+
"max_steps": 292,
|
| 631 |
+
"num_input_tokens_seen": 19934084,
|
| 632 |
+
"num_train_epochs": 1,
|
| 633 |
+
"save_steps": 100,
|
| 634 |
+
"stateful_callbacks": {
|
| 635 |
+
"TrainerControl": {
|
| 636 |
+
"args": {
|
| 637 |
+
"should_epoch_stop": false,
|
| 638 |
+
"should_evaluate": false,
|
| 639 |
+
"should_log": false,
|
| 640 |
+
"should_save": true,
|
| 641 |
+
"should_training_stop": false
|
| 642 |
+
},
|
| 643 |
+
"attributes": {}
|
| 644 |
+
}
|
| 645 |
+
},
|
| 646 |
+
"total_flos": 0.0,
|
| 647 |
+
"train_batch_size": 12,
|
| 648 |
+
"trial_name": null,
|
| 649 |
+
"trial_params": null
|
| 650 |
+
}
|
checkpoint-200/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbde34589936c15afd6048c963f9277d5d47635a7f4b4ddffd47df13e59cdd3c
|
| 3 |
+
size 7569
|
checkpoint-200/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-292/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
checkpoint-292/chat_template.jinja
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- set image_count = namespace(value=0) %}
|
| 2 |
+
{%- set video_count = namespace(value=0) %}
|
| 3 |
+
{%- macro render_content(content, do_vision_count) %}
|
| 4 |
+
{%- if content is string %}
|
| 5 |
+
{{- content }}
|
| 6 |
+
{%- else %}
|
| 7 |
+
{%- for item in content %}
|
| 8 |
+
{%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
|
| 9 |
+
{%- if do_vision_count %}
|
| 10 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 11 |
+
{%- endif %}
|
| 12 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 13 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 14 |
+
{%- elif 'video' in item or item.type == 'video' %}
|
| 15 |
+
{%- if do_vision_count %}
|
| 16 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 17 |
+
{%- endif %}
|
| 18 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 19 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 20 |
+
{%- elif 'text' in item %}
|
| 21 |
+
{{- item.text }}
|
| 22 |
+
{%- endif %}
|
| 23 |
+
{%- endfor %}
|
| 24 |
+
{%- endif %}
|
| 25 |
+
{%- endmacro %}
|
| 26 |
+
{%- if tools %}
|
| 27 |
+
{{- '<|im_start|>system\n' }}
|
| 28 |
+
{%- if messages[0].role == 'system' %}
|
| 29 |
+
{{- render_content(messages[0].content, false) + '\n\n' }}
|
| 30 |
+
{%- endif %}
|
| 31 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 32 |
+
{%- for tool in tools %}
|
| 33 |
+
{{- "\n" }}
|
| 34 |
+
{{- tool | tojson }}
|
| 35 |
+
{%- endfor %}
|
| 36 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 37 |
+
{%- else %}
|
| 38 |
+
{%- if messages[0].role == 'system' %}
|
| 39 |
+
{{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
|
| 40 |
+
{%- endif %}
|
| 41 |
+
{%- endif %}
|
| 42 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 43 |
+
{%- for message in messages[::-1] %}
|
| 44 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 45 |
+
{%- if ns.multi_step_tool and message.role == "user" %}
|
| 46 |
+
{%- set content = render_content(message.content, false) %}
|
| 47 |
+
{%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
|
| 48 |
+
{%- set ns.multi_step_tool = false %}
|
| 49 |
+
{%- set ns.last_query_index = index %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endif %}
|
| 52 |
+
{%- endfor %}
|
| 53 |
+
{%- for message in messages %}
|
| 54 |
+
{%- set content = render_content(message.content, True) %}
|
| 55 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 56 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 57 |
+
{%- elif message.role == "assistant" %}
|
| 58 |
+
{%- set reasoning_content = '' %}
|
| 59 |
+
{%- if message.reasoning_content is string %}
|
| 60 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{%- if '</think>' in content %}
|
| 63 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 64 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 65 |
+
{%- endif %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 68 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 69 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 70 |
+
{%- else %}
|
| 71 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{%- else %}
|
| 74 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 75 |
+
{%- endif %}
|
| 76 |
+
{%- if message.tool_calls %}
|
| 77 |
+
{%- for tool_call in message.tool_calls %}
|
| 78 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 79 |
+
{{- '\n' }}
|
| 80 |
+
{%- endif %}
|
| 81 |
+
{%- if tool_call.function %}
|
| 82 |
+
{%- set tool_call = tool_call.function %}
|
| 83 |
+
{%- endif %}
|
| 84 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 85 |
+
{{- tool_call.name }}
|
| 86 |
+
{{- '", "arguments": ' }}
|
| 87 |
+
{%- if tool_call.arguments is string %}
|
| 88 |
+
{{- tool_call.arguments }}
|
| 89 |
+
{%- else %}
|
| 90 |
+
{{- tool_call.arguments | tojson }}
|
| 91 |
+
{%- endif %}
|
| 92 |
+
{{- '}\n</tool_call>' }}
|
| 93 |
+
{%- endfor %}
|
| 94 |
+
{%- endif %}
|
| 95 |
+
{{- '<|im_end|>\n' }}
|
| 96 |
+
{%- elif message.role == "tool" %}
|
| 97 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 98 |
+
{{- '<|im_start|>user' }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{{- '\n<tool_response>\n' }}
|
| 101 |
+
{{- content }}
|
| 102 |
+
{{- '\n</tool_response>' }}
|
| 103 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 104 |
+
{{- '<|im_end|>\n' }}
|
| 105 |
+
{%- endif %}
|
| 106 |
+
{%- endif %}
|
| 107 |
+
{%- endfor %}
|
| 108 |
+
{%- if add_generation_prompt %}
|
| 109 |
+
{{- '<|im_start|>assistant\n' }}
|
| 110 |
+
{%- endif %}
|
checkpoint-292/config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3VLForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"eos_token_id": 151645,
|
| 7 |
+
"image_token_id": 151655,
|
| 8 |
+
"model_type": "qwen3_vl",
|
| 9 |
+
"pad_token_id": 151643,
|
| 10 |
+
"text_config": {
|
| 11 |
+
"attention_bias": false,
|
| 12 |
+
"attention_dropout": 0.0,
|
| 13 |
+
"bos_token_id": 151643,
|
| 14 |
+
"dtype": "bfloat16",
|
| 15 |
+
"eos_token_id": 151645,
|
| 16 |
+
"head_dim": 128,
|
| 17 |
+
"hidden_act": "silu",
|
| 18 |
+
"hidden_size": 2560,
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 9728,
|
| 21 |
+
"max_position_embeddings": 262144,
|
| 22 |
+
"model_type": "qwen3_vl_text",
|
| 23 |
+
"num_attention_heads": 32,
|
| 24 |
+
"num_hidden_layers": 36,
|
| 25 |
+
"num_key_value_heads": 8,
|
| 26 |
+
"rms_norm_eps": 1e-06,
|
| 27 |
+
"rope_scaling": {
|
| 28 |
+
"mrope_interleaved": true,
|
| 29 |
+
"mrope_section": [
|
| 30 |
+
24,
|
| 31 |
+
20,
|
| 32 |
+
20
|
| 33 |
+
],
|
| 34 |
+
"rope_type": "default"
|
| 35 |
+
},
|
| 36 |
+
"rope_theta": 5000000,
|
| 37 |
+
"tie_word_embeddings": true,
|
| 38 |
+
"use_cache": true,
|
| 39 |
+
"vocab_size": 151936
|
| 40 |
+
},
|
| 41 |
+
"tie_word_embeddings": true,
|
| 42 |
+
"transformers_version": "4.57.6",
|
| 43 |
+
"use_cache": false,
|
| 44 |
+
"video_token_id": 151656,
|
| 45 |
+
"vision_config": {
|
| 46 |
+
"deepstack_visual_indexes": [
|
| 47 |
+
5,
|
| 48 |
+
11,
|
| 49 |
+
17
|
| 50 |
+
],
|
| 51 |
+
"depth": 24,
|
| 52 |
+
"dtype": "bfloat16",
|
| 53 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 54 |
+
"hidden_size": 1024,
|
| 55 |
+
"in_channels": 3,
|
| 56 |
+
"initializer_range": 0.02,
|
| 57 |
+
"intermediate_size": 4096,
|
| 58 |
+
"model_type": "qwen3_vl",
|
| 59 |
+
"num_heads": 16,
|
| 60 |
+
"num_position_embeddings": 2304,
|
| 61 |
+
"out_hidden_size": 2560,
|
| 62 |
+
"patch_size": 16,
|
| 63 |
+
"spatial_merge_size": 2,
|
| 64 |
+
"temporal_patch_size": 2
|
| 65 |
+
},
|
| 66 |
+
"vision_end_token_id": 151653,
|
| 67 |
+
"vision_start_token_id": 151652
|
| 68 |
+
}
|
checkpoint-292/generation_config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_sample": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151645,
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"top_k": 20,
|
| 10 |
+
"top_p": 0.95,
|
| 11 |
+
"transformers_version": "4.57.6"
|
| 12 |
+
}
|
checkpoint-292/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|