KRadim commited on
Commit
1d56598
·
verified ·
1 Parent(s): b83f8c8

LoRA adapter for Flax DistilBERT (NNX implementation)

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ lora_checkpoint/ocdbt.process_0/d/69fc463edd30152315ba60775f4b3c5c filter=lfs diff=lfs merge=lfs -text
37
+ lora_checkpoint/ocdbt.process_0/d/724e04203f2095d9727dbdd266bda452 filter=lfs diff=lfs merge=lfs -text
38
+ lora_checkpoint/ocdbt.process_0/d/82e6cab7299b3ce1fde729a27b339e89 filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ ---
4
+ language:
5
+ - en
6
+ thumbnail: https://github.com/karanchahal/distiller/blob/master/distiller.jpg
7
+ tags:
8
+ - question-answering
9
+ license: apache-2.0
10
+ datasets:
11
+ - squad
12
+ metrics:
13
+ - squad
14
+ ---
15
+
16
+ # LoRA Adapter for distilbert-base-uncased
17
+
18
+ This adapter was trained using Flax NNX for the task of sentiment classification on the IMDB dataset.
19
+
20
+ ## Configuration
21
+ - LoRA rank: 16
22
+ - LoRA alpha: 32
23
+ - Target modules: ['q_lin', 'v_lin']
24
+
25
+ ## Use
26
+ ```python
27
+ # Loading and using the adapter
28
+ import orbax.checkpoint as ocp
29
+ import pickle
30
+
31
+ # Method 1: Orbax (recommended for larger models)
32
+ checkpointer = ocp.StandardCheckpointer()
33
+ lora_params = checkpointer.restore("lora_checkpoint")
34
+
35
+ # Method 2: Pickle (easier for smaller models)
36
+ with open("lora_params.pkl", "rb") as f:
37
+ lora_params = pickle.load(f)
38
+
39
+ # Reconstruction of LoRA layers
40
+ from flax import nnx
41
+ lora_layers = None
42
+ for layer_name, params in lora_params.items():
43
+ # Create a new LoRA layer with the correct parameters
44
+ # (implementation depends on the specific architecture)
45
+ pass
46
+ ```
adapter_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lora_rank": 16,
3
+ "lora_alpha": 32,
4
+ "target_modules": [
5
+ "q_lin",
6
+ "v_lin"
7
+ ],
8
+ "base_model_name": "distilbert-base-uncased",
9
+ "task_type": "sequence_classification"
10
+ }
lora_checkpoint/_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"item_handlers": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1758277493548384711, "commit_timestamp_nsecs": 1758277493896111732, "custom_metadata": {}}
lora_checkpoint/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('layer_0_q_lin', 'bias')": {"key_metadata": [{"key": "layer_0_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_0_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_0_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_0_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_0_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_0_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_0_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_0_v_lin', 'bias')": {"key_metadata": [{"key": "layer_0_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_0_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_0_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_0_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_0_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_0_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_0_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_1_q_lin', 'bias')": {"key_metadata": [{"key": "layer_1_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_1_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_1_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_1_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_1_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_1_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_1_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_1_v_lin', 'bias')": {"key_metadata": [{"key": "layer_1_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_1_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_1_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_1_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_1_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_1_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_1_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_2_q_lin', 'bias')": {"key_metadata": [{"key": "layer_2_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_2_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_2_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_2_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_2_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_2_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_2_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_2_v_lin', 'bias')": {"key_metadata": [{"key": "layer_2_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_2_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_2_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_2_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_2_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_2_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_2_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_3_q_lin', 'bias')": {"key_metadata": [{"key": "layer_3_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_3_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_3_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_3_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_3_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_3_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_3_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_3_v_lin', 'bias')": {"key_metadata": [{"key": "layer_3_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_3_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_3_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_3_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_3_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_3_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_3_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_4_q_lin', 'bias')": {"key_metadata": [{"key": "layer_4_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_4_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_4_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_4_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_4_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_4_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_4_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_4_v_lin', 'bias')": {"key_metadata": [{"key": "layer_4_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_4_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_4_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_4_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_4_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_4_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_4_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_5_q_lin', 'bias')": {"key_metadata": [{"key": "layer_5_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_5_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_5_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_5_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_5_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_5_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_5_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_5_v_lin', 'bias')": {"key_metadata": [{"key": "layer_5_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_5_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_5_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_5_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_5_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_5_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_5_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
lora_checkpoint/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bGF5ZXJfM192X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM192X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM192X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM192X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM19xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM19xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM19xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM19xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
lora_checkpoint/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "layer_0_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}]}
lora_checkpoint/d/51f50ee9e30bcc68842ed721aac08cdd ADDED
Binary file (699 Bytes). View file
 
lora_checkpoint/manifest.ocdbt ADDED
Binary file (116 Bytes). View file
 
lora_checkpoint/ocdbt.process_0/d/31274f15bd89ca15decd5e29c83a4889 ADDED
Binary file (360 Bytes). View file
 
lora_checkpoint/ocdbt.process_0/d/4f22d159ac2161a58dcfe1d70cb0a43e ADDED
Binary file (188 Bytes). View file
 
lora_checkpoint/ocdbt.process_0/d/69fc463edd30152315ba60775f4b3c5c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14c7934db4095e2f15dae6d88084caee8eebec968618dacfc4756cd3005e0197
3
+ size 2183663
lora_checkpoint/ocdbt.process_0/d/724e04203f2095d9727dbdd266bda452 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d18953b2d87cb7cdcc08f3ba2c6a6eac2679074614a0e9953fea6ffe3f8feb
3
+ size 20097548
lora_checkpoint/ocdbt.process_0/d/82e6cab7299b3ce1fde729a27b339e89 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cefa375e1f93751616fe0eced8fba54aabbcab4a0f636c4a990866ae785f15e
3
+ size 4456644
lora_checkpoint/ocdbt.process_0/d/ccc7945413dbb108daf751b8bec4743a ADDED
Binary file (321 Bytes). View file
 
lora_checkpoint/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (344 Bytes). View file
 
lora_params.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03dbee2db9399a94280e61a867ff89b0f8b359ddb291406f4dc4ab7ecd1a0244
3
+ size 29531978