📦 Add model, tokenizer, and model card
Browse files- README.md +4 -3
- adapter_config.json +3 -3
- adapter_model.safetensors +2 -2
- lorax_manifest.json +3 -3
README.md
CHANGED
|
@@ -3,10 +3,10 @@ tags:
|
|
| 3 |
- finetuned
|
| 4 |
---
|
| 5 |
|
| 6 |
-
# meta-llama/Llama-3.
|
| 7 |
|
| 8 |
## Model Description
|
| 9 |
-
This model was fine-tuned from `meta-llama/Llama-3.
|
| 10 |
|
| 11 |
## Training Data
|
| 12 |
- **Dataset name:** fka/awesome-chatgpt-prompts
|
|
@@ -17,5 +17,6 @@ This model was fine-tuned from `meta-llama/Llama-3.1-8B` on `fka/awesome-chatgpt
|
|
| 17 |
- **Used ATOMIC Speed:** True
|
| 18 |
|
| 19 |
## Final Metrics
|
| 20 |
-
- **Training loss:**
|
|
|
|
| 21 |
---
|
|
|
|
| 3 |
- finetuned
|
| 4 |
---
|
| 5 |
|
| 6 |
+
# meta-llama/Llama-3.2-1B-finetuned with Atomic
|
| 7 |
|
| 8 |
## Model Description
|
| 9 |
+
This model was fine-tuned from `meta-llama/Llama-3.2-1B` on `fka/awesome-chatgpt-prompts` data using NOLA AI’s Atomic system.
|
| 10 |
|
| 11 |
## Training Data
|
| 12 |
- **Dataset name:** fka/awesome-chatgpt-prompts
|
|
|
|
| 17 |
- **Used ATOMIC Speed:** True
|
| 18 |
|
| 19 |
## Final Metrics
|
| 20 |
+
- **Training loss:** 1.5815104802449544
|
| 21 |
+
- **Training Runtime:** 0:00:46
|
| 22 |
---
|
adapter_config.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
"base_model_class": "LlamaForCausalLM",
|
| 5 |
"parent_library": "transformers.models.llama.modeling_llama"
|
| 6 |
},
|
| 7 |
-
"base_model_name_or_path": "meta-llama/Llama-3.
|
| 8 |
"bias": "none",
|
| 9 |
"corda_config": null,
|
| 10 |
"eva_config": null,
|
|
@@ -27,12 +27,12 @@
|
|
| 27 |
"rank_pattern": {},
|
| 28 |
"revision": null,
|
| 29 |
"target_modules": [
|
|
|
|
| 30 |
"v_proj",
|
| 31 |
"gate_proj",
|
|
|
|
| 32 |
"k_proj",
|
| 33 |
"q_proj",
|
| 34 |
-
"o_proj",
|
| 35 |
-
"down_proj",
|
| 36 |
"up_proj"
|
| 37 |
],
|
| 38 |
"task_type": null,
|
|
|
|
| 4 |
"base_model_class": "LlamaForCausalLM",
|
| 5 |
"parent_library": "transformers.models.llama.modeling_llama"
|
| 6 |
},
|
| 7 |
+
"base_model_name_or_path": "meta-llama/Llama-3.2-1B",
|
| 8 |
"bias": "none",
|
| 9 |
"corda_config": null,
|
| 10 |
"eva_config": null,
|
|
|
|
| 27 |
"rank_pattern": {},
|
| 28 |
"revision": null,
|
| 29 |
"target_modules": [
|
| 30 |
+
"o_proj",
|
| 31 |
"v_proj",
|
| 32 |
"gate_proj",
|
| 33 |
+
"down_proj",
|
| 34 |
"k_proj",
|
| 35 |
"q_proj",
|
|
|
|
|
|
|
| 36 |
"up_proj"
|
| 37 |
],
|
| 38 |
"task_type": null,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa9cd83605777c79ba8bcea5eecf6b31323bb85d686823f9d1199355175168d7
|
| 3 |
+
size 45118424
|
lorax_manifest.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"schema": "atomizer/lorax-manifest@v1",
|
| 3 |
-
"created_at": "2025-09-
|
| 4 |
"artifacts": {
|
| 5 |
"base_model_id": "",
|
| 6 |
"adapters_repo_id": "volfan6415/DemoDayModel_LoRA",
|
|
@@ -14,7 +14,7 @@
|
|
| 14 |
"adapter_source": "hub",
|
| 15 |
"private": true
|
| 16 |
},
|
| 17 |
-
"runid": "
|
| 18 |
"output_dir": "DemoDayModel",
|
| 19 |
"training_args": {
|
| 20 |
"bf16": true,
|
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"logging_steps": 10,
|
| 25 |
"lr_scheduler_type": "linear",
|
| 26 |
"max_grad_norm": 1,
|
| 27 |
-
"num_train_epochs":
|
| 28 |
"optim": "adamw_torch_fused",
|
| 29 |
"output_dir": "DemoDayModel",
|
| 30 |
"per_device_eval_batch_size": 16,
|
|
|
|
| 1 |
{
|
| 2 |
"schema": "atomizer/lorax-manifest@v1",
|
| 3 |
+
"created_at": "2025-09-18T17:43:05.756765+00:00",
|
| 4 |
"artifacts": {
|
| 5 |
"base_model_id": "",
|
| 6 |
"adapters_repo_id": "volfan6415/DemoDayModel_LoRA",
|
|
|
|
| 14 |
"adapter_source": "hub",
|
| 15 |
"private": true
|
| 16 |
},
|
| 17 |
+
"runid": "eff98df7-f8ad-4385-bb48-20b20626c6da",
|
| 18 |
"output_dir": "DemoDayModel",
|
| 19 |
"training_args": {
|
| 20 |
"bf16": true,
|
|
|
|
| 24 |
"logging_steps": 10,
|
| 25 |
"lr_scheduler_type": "linear",
|
| 26 |
"max_grad_norm": 1,
|
| 27 |
+
"num_train_epochs": 10,
|
| 28 |
"optim": "adamw_torch_fused",
|
| 29 |
"output_dir": "DemoDayModel",
|
| 30 |
"per_device_eval_batch_size": 16,
|