LoRA adapter for Flax DistilBERT (NNX implementation)
Browse files- .gitattributes +3 -0
- README.md +46 -0
- adapter_config.json +10 -0
- lora_checkpoint/_CHECKPOINT_METADATA +1 -0
- lora_checkpoint/_METADATA +1 -0
- lora_checkpoint/_sharding +1 -0
- lora_checkpoint/array_metadatas/process_0 +1 -0
- lora_checkpoint/d/51f50ee9e30bcc68842ed721aac08cdd +0 -0
- lora_checkpoint/manifest.ocdbt +0 -0
- lora_checkpoint/ocdbt.process_0/d/31274f15bd89ca15decd5e29c83a4889 +0 -0
- lora_checkpoint/ocdbt.process_0/d/4f22d159ac2161a58dcfe1d70cb0a43e +0 -0
- lora_checkpoint/ocdbt.process_0/d/69fc463edd30152315ba60775f4b3c5c +3 -0
- lora_checkpoint/ocdbt.process_0/d/724e04203f2095d9727dbdd266bda452 +3 -0
- lora_checkpoint/ocdbt.process_0/d/82e6cab7299b3ce1fde729a27b339e89 +3 -0
- lora_checkpoint/ocdbt.process_0/d/ccc7945413dbb108daf751b8bec4743a +0 -0
- lora_checkpoint/ocdbt.process_0/manifest.ocdbt +0 -0
- lora_params.pkl +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
lora_checkpoint/ocdbt.process_0/d/69fc463edd30152315ba60775f4b3c5c filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
lora_checkpoint/ocdbt.process_0/d/724e04203f2095d9727dbdd266bda452 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
lora_checkpoint/ocdbt.process_0/d/82e6cab7299b3ce1fde729a27b339e89 filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
---
|
| 4 |
+
language:
|
| 5 |
+
- en
|
| 6 |
+
thumbnail: https://github.com/karanchahal/distiller/blob/master/distiller.jpg
|
| 7 |
+
tags:
|
| 8 |
+
- question-answering
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
datasets:
|
| 11 |
+
- squad
|
| 12 |
+
metrics:
|
| 13 |
+
- squad
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
# LoRA Adapter for distilbert-base-uncased
|
| 17 |
+
|
| 18 |
+
This adapter was trained using Flax NNX for the task of sentiment classification on the IMDB dataset.
|
| 19 |
+
|
| 20 |
+
## Configuration
|
| 21 |
+
- LoRA rank: 16
|
| 22 |
+
- LoRA alpha: 32
|
| 23 |
+
- Target modules: ['q_lin', 'v_lin']
|
| 24 |
+
|
| 25 |
+
## Use
|
| 26 |
+
```python
|
| 27 |
+
# Loading and using the adapter
|
| 28 |
+
import orbax.checkpoint as ocp
|
| 29 |
+
import pickle
|
| 30 |
+
|
| 31 |
+
# Method 1: Orbax (recommended for larger models)
|
| 32 |
+
checkpointer = ocp.StandardCheckpointer()
|
| 33 |
+
lora_params = checkpointer.restore("lora_checkpoint")
|
| 34 |
+
|
| 35 |
+
# Method 2: Pickle (easier for smaller models)
|
| 36 |
+
with open("lora_params.pkl", "rb") as f:
|
| 37 |
+
lora_params = pickle.load(f)
|
| 38 |
+
|
| 39 |
+
# Reconstruction of LoRA layers
|
| 40 |
+
from flax import nnx
|
| 41 |
+
lora_layers = None
|
| 42 |
+
for layer_name, params in lora_params.items():
|
| 43 |
+
# Create a new LoRA layer with the correct parameters
|
| 44 |
+
# (implementation depends on the specific architecture)
|
| 45 |
+
pass
|
| 46 |
+
```
|
adapter_config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"lora_rank": 16,
|
| 3 |
+
"lora_alpha": 32,
|
| 4 |
+
"target_modules": [
|
| 5 |
+
"q_lin",
|
| 6 |
+
"v_lin"
|
| 7 |
+
],
|
| 8 |
+
"base_model_name": "distilbert-base-uncased",
|
| 9 |
+
"task_type": "sequence_classification"
|
| 10 |
+
}
|
lora_checkpoint/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1758277493548384711, "commit_timestamp_nsecs": 1758277493896111732, "custom_metadata": {}}
|
lora_checkpoint/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('layer_0_q_lin', 'bias')": {"key_metadata": [{"key": "layer_0_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_0_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_0_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_0_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_0_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_0_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_0_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_0_v_lin', 'bias')": {"key_metadata": [{"key": "layer_0_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_0_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_0_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_0_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_0_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_0_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_0_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_1_q_lin', 'bias')": {"key_metadata": [{"key": "layer_1_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_1_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_1_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_1_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_1_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_1_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_1_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_1_v_lin', 'bias')": {"key_metadata": [{"key": "layer_1_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_1_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_1_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_1_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_1_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_1_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_1_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_2_q_lin', 'bias')": {"key_metadata": [{"key": "layer_2_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_2_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_2_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_2_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_2_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_2_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_2_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_2_v_lin', 'bias')": {"key_metadata": [{"key": "layer_2_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_2_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_2_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_2_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_2_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_2_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_2_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_3_q_lin', 'bias')": {"key_metadata": [{"key": "layer_3_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_3_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_3_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_3_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_3_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_3_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_3_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_3_v_lin', 'bias')": {"key_metadata": [{"key": "layer_3_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_3_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_3_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_3_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_3_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_3_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_3_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_4_q_lin', 'bias')": {"key_metadata": [{"key": "layer_4_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_4_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_4_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_4_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_4_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_4_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_4_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_4_v_lin', 'bias')": {"key_metadata": [{"key": "layer_4_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_4_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_4_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_4_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_4_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_4_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_4_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_5_q_lin', 'bias')": {"key_metadata": [{"key": "layer_5_q_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_5_q_lin', 'kernel')": {"key_metadata": [{"key": "layer_5_q_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_5_q_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_5_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_5_q_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_5_q_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}, "('layer_5_v_lin', 'bias')": {"key_metadata": [{"key": "layer_5_v_lin", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layer_5_v_lin', 'kernel')": {"key_metadata": [{"key": "layer_5_v_lin", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layer_5_v_lin', 'lora', 'lora_a', 'value')": {"key_metadata": [{"key": "layer_5_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_a", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 16]}}, "('layer_5_v_lin', 'lora', 'lora_b', 'value')": {"key_metadata": [{"key": "layer_5_v_lin", "key_type": 2}, {"key": "lora", "key_type": 2}, {"key": "lora_b", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 768]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
lora_checkpoint/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"bGF5ZXJfM192X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM192X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM192X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM192X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM19xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM19xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM19xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfM19xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMF9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMV9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfMl9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNF9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV92X2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV92X2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV92X2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV92X2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV9xX2xpbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV9xX2xpbi5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV9xX2xpbi5sb3JhLmxvcmFfYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bGF5ZXJfNV9xX2xpbi5sb3JhLmxvcmFfYi52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
|
lora_checkpoint/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "layer_0_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_0_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_1_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_2_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_3_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_4_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_q_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_q_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_q_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_q_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_v_lin.bias", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_v_lin.kernel", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_v_lin.lora.lora_a.value", "write_shape": [768, 16], "chunk_shape": [768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "layer_5_v_lin.lora.lora_b.value", "write_shape": [16, 768], "chunk_shape": [16, 768], "ext_metadata": null}}]}
|
lora_checkpoint/d/51f50ee9e30bcc68842ed721aac08cdd
ADDED
|
Binary file (699 Bytes). View file
|
|
|
lora_checkpoint/manifest.ocdbt
ADDED
|
Binary file (116 Bytes). View file
|
|
|
lora_checkpoint/ocdbt.process_0/d/31274f15bd89ca15decd5e29c83a4889
ADDED
|
Binary file (360 Bytes). View file
|
|
|
lora_checkpoint/ocdbt.process_0/d/4f22d159ac2161a58dcfe1d70cb0a43e
ADDED
|
Binary file (188 Bytes). View file
|
|
|
lora_checkpoint/ocdbt.process_0/d/69fc463edd30152315ba60775f4b3c5c
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14c7934db4095e2f15dae6d88084caee8eebec968618dacfc4756cd3005e0197
|
| 3 |
+
size 2183663
|
lora_checkpoint/ocdbt.process_0/d/724e04203f2095d9727dbdd266bda452
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54d18953b2d87cb7cdcc08f3ba2c6a6eac2679074614a0e9953fea6ffe3f8feb
|
| 3 |
+
size 20097548
|
lora_checkpoint/ocdbt.process_0/d/82e6cab7299b3ce1fde729a27b339e89
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cefa375e1f93751616fe0eced8fba54aabbcab4a0f636c4a990866ae785f15e
|
| 3 |
+
size 4456644
|
lora_checkpoint/ocdbt.process_0/d/ccc7945413dbb108daf751b8bec4743a
ADDED
|
Binary file (321 Bytes). View file
|
|
|
lora_checkpoint/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (344 Bytes). View file
|
|
|
lora_params.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03dbee2db9399a94280e61a867ff89b0f8b359ddb291406f4dc4ab7ecd1a0244
|
| 3 |
+
size 29531978
|