Upload task output 80e29c5f-aaad-438b-8f77-3b945c595a62

Browse files

Files changed (6) hide show

README.md +202 -0
adapter_config.json +6 -6
adapter_model.safetensors +1 -1
loss.txt +1 -0
trainer_state.json +560 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: diagonalge/8b97021b-316b-42e5-bd6b-d6728d661f87
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.1

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "besimray/open-instruction-finetuning-v10-task_training_1",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
@@ -24,13 +24,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "gate_proj",
     "k_proj",
     "q_proj",
-    "up_proj",
-    "o_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "diagonalge/8b97021b-316b-42e5-bd6b-d6728d661f87",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "o_proj",
     "k_proj",
+    "down_proj",
     "q_proj",
+    "gate_proj",
+    "v_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ff689194160a5cedbdc8c32c5cfc28151d8af9e5e8a951ba3d318a6117dd387
 size 2147607752

 version https://git-lfs.github.com/spec/v1
+oid sha256:68d6a2e19e12221b1f186de8ff2096c9283ed2597eed3a9b54b83d8fd3292451
 size 2147607752

loss.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 372,0.5460469722747803

trainer_state.json ADDED Viewed

	@@ -0,0 +1,560 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9973190348525469,
+  "eval_steps": 500,
+  "global_step": 372,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.013404825737265416,
+      "grad_norm": 0.08981207190697534,
+      "learning_rate": 9.237540571428572e-06,
+      "loss": 0.6294,
+      "step": 5
+    },
+    {
+      "epoch": 0.02680965147453083,
+      "grad_norm": 0.08680879376503252,
+      "learning_rate": 2.0784466285714287e-05,
+      "loss": 0.6332,
+      "step": 10
+    },
+    {
+      "epoch": 0.040214477211796246,
+      "grad_norm": 0.09434665550021083,
+      "learning_rate": 3.2331392000000005e-05,
+      "loss": 0.6196,
+      "step": 15
+    },
+    {
+      "epoch": 0.05361930294906166,
+      "grad_norm": 0.09393130376712282,
+      "learning_rate": 4.3878317714285716e-05,
+      "loss": 0.6315,
+      "step": 20
+    },
+    {
+      "epoch": 0.06702412868632708,
+      "grad_norm": 0.09029347491509827,
+      "learning_rate": 5.542524342857144e-05,
+      "loss": 0.6512,
+      "step": 25
+    },
+    {
+      "epoch": 0.08042895442359249,
+      "grad_norm": 0.08774137876915566,
+      "learning_rate": 6.697216914285716e-05,
+      "loss": 0.5903,
+      "step": 30
+    },
+    {
+      "epoch": 0.0938337801608579,
+      "grad_norm": 0.08028843954207011,
+      "learning_rate": 7.851909485714286e-05,
+      "loss": 0.598,
+      "step": 35
+    },
+    {
+      "epoch": 0.10723860589812333,
+      "grad_norm": 0.08272102631034538,
+      "learning_rate": 8.082644332382112e-05,
+      "loss": 0.6305,
+      "step": 40
+    },
+    {
+      "epoch": 0.12064343163538874,
+      "grad_norm": 0.09515284533661915,
+      "learning_rate": 8.081816979593137e-05,
+      "loss": 0.6338,
+      "step": 45
+    },
+    {
+      "epoch": 0.13404825737265416,
+      "grad_norm": 0.08587625676328951,
+      "learning_rate": 8.080353385999115e-05,
+      "loss": 0.6265,
+      "step": 50
+    },
+    {
+      "epoch": 0.14745308310991956,
+      "grad_norm": 0.07537595111192047,
+      "learning_rate": 8.078253858922353e-05,
+      "loss": 0.5847,
+      "step": 55
+    },
+    {
+      "epoch": 0.16085790884718498,
+      "grad_norm": 0.08243472253892728,
+      "learning_rate": 8.075518839217143e-05,
+      "loss": 0.6328,
+      "step": 60
+    },
+    {
+      "epoch": 0.1742627345844504,
+      "grad_norm": 0.08102082609846817,
+      "learning_rate": 8.072148901177182e-05,
+      "loss": 0.6212,
+      "step": 65
+    },
+    {
+      "epoch": 0.1876675603217158,
+      "grad_norm": 0.07817590622583633,
+      "learning_rate": 8.068144752414985e-05,
+      "loss": 0.5648,
+      "step": 70
+    },
+    {
+      "epoch": 0.20107238605898123,
+      "grad_norm": 0.07896271966089874,
+      "learning_rate": 8.063507233713306e-05,
+      "loss": 0.6079,
+      "step": 75
+    },
+    {
+      "epoch": 0.21447721179624665,
+      "grad_norm": 0.08515890662804816,
+      "learning_rate": 8.058237318848586e-05,
+      "loss": 0.6148,
+      "step": 80
+    },
+    {
+      "epoch": 0.22788203753351208,
+      "grad_norm": 0.07899361169225548,
+      "learning_rate": 8.05233611438649e-05,
+      "loss": 0.6039,
+      "step": 85
+    },
+    {
+      "epoch": 0.24128686327077747,
+      "grad_norm": 0.08110223382371098,
+      "learning_rate": 8.045804859449539e-05,
+      "loss": 0.6056,
+      "step": 90
+    },
+    {
+      "epoch": 0.2546916890080429,
+      "grad_norm": 0.0801171434880905,
+      "learning_rate": 8.038644925456942e-05,
+      "loss": 0.5902,
+      "step": 95
+    },
+    {
+      "epoch": 0.2680965147453083,
+      "grad_norm": 0.08399046356156545,
+      "learning_rate": 8.030857815836606e-05,
+      "loss": 0.583,
+      "step": 100
+    },
+    {
+      "epoch": 0.28150134048257375,
+      "grad_norm": 0.07813715381484988,
+      "learning_rate": 8.022445165709468e-05,
+      "loss": 0.5986,
+      "step": 105
+    },
+    {
+      "epoch": 0.2949061662198391,
+      "grad_norm": 0.07998998874715048,
+      "learning_rate": 8.013408741546141e-05,
+      "loss": 0.6049,
+      "step": 110
+    },
+    {
+      "epoch": 0.30831099195710454,
+      "grad_norm": 0.08177232425470732,
+      "learning_rate": 8.003750440796005e-05,
+      "loss": 0.5903,
+      "step": 115
+    },
+    {
+      "epoch": 0.32171581769436997,
+      "grad_norm": 0.07550288082245794,
+      "learning_rate": 7.993472291488778e-05,
+      "loss": 0.6062,
+      "step": 120
+    },
+    {
+      "epoch": 0.3351206434316354,
+      "grad_norm": 0.0887925405039122,
+      "learning_rate": 7.98257645180868e-05,
+      "loss": 0.5951,
+      "step": 125
+    },
+    {
+      "epoch": 0.3485254691689008,
+      "grad_norm": 0.07219892616885991,
+      "learning_rate": 7.971065209641256e-05,
+      "loss": 0.6043,
+      "step": 130
+    },
+    {
+      "epoch": 0.36193029490616624,
+      "grad_norm": 0.08031931118962232,
+      "learning_rate": 7.958940982092973e-05,
+      "loss": 0.6101,
+      "step": 135
+    },
+    {
+      "epoch": 0.3753351206434316,
+      "grad_norm": 0.08203925915174202,
+      "learning_rate": 7.946206314983687e-05,
+      "loss": 0.6057,
+      "step": 140
+    },
+    {
+      "epoch": 0.38873994638069703,
+      "grad_norm": 0.08166957636163598,
+      "learning_rate": 7.932863882312065e-05,
+      "loss": 0.6467,
+      "step": 145
+    },
+    {
+      "epoch": 0.40214477211796246,
+      "grad_norm": 0.0801063819915164,
+      "learning_rate": 7.918916485694114e-05,
+      "loss": 0.621,
+      "step": 150
+    },
+    {
+      "epoch": 0.4155495978552279,
+      "grad_norm": 0.08622438600794285,
+      "learning_rate": 7.904367053774903e-05,
+      "loss": 0.6522,
+      "step": 155
+    },
+    {
+      "epoch": 0.4289544235924933,
+      "grad_norm": 0.08435395594153385,
+      "learning_rate": 7.889218641613608e-05,
+      "loss": 0.6012,
+      "step": 160
+    },
+    {
+      "epoch": 0.44235924932975873,
+      "grad_norm": 0.08162062248277437,
+      "learning_rate": 7.873474430042016e-05,
+      "loss": 0.6217,
+      "step": 165
+    },
+    {
+      "epoch": 0.45576407506702415,
+      "grad_norm": 0.09215735624734174,
+      "learning_rate": 7.857137724996627e-05,
+      "loss": 0.6035,
+      "step": 170
+    },
+    {
+      "epoch": 0.4691689008042895,
+      "grad_norm": 0.07955571470065852,
+      "learning_rate": 7.840211956824479e-05,
+      "loss": 0.6044,
+      "step": 175
+    },
+    {
+      "epoch": 0.48257372654155495,
+      "grad_norm": 0.07430353315550912,
+      "learning_rate": 7.822700679562843e-05,
+      "loss": 0.5881,
+      "step": 180
+    },
+    {
+      "epoch": 0.4959785522788204,
+      "grad_norm": 0.08100801500170367,
+      "learning_rate": 7.804607570192965e-05,
+      "loss": 0.6045,
+      "step": 185
+    },
+    {
+      "epoch": 0.5093833780160858,
+      "grad_norm": 0.08062454257078801,
+      "learning_rate": 7.785936427867972e-05,
+      "loss": 0.5963,
+      "step": 190
+    },
+    {
+      "epoch": 0.5227882037533512,
+      "grad_norm": 0.07796465811008664,
+      "learning_rate": 7.76669117311514e-05,
+      "loss": 0.5739,
+      "step": 195
+    },
+    {
+      "epoch": 0.5361930294906166,
+      "grad_norm": 0.07717525410256688,
+      "learning_rate": 7.74687584701267e-05,
+      "loss": 0.5968,
+      "step": 200
+    },
+    {
+      "epoch": 0.5495978552278821,
+      "grad_norm": 0.08316124060287039,
+      "learning_rate": 7.72649461034114e-05,
+      "loss": 0.5832,
+      "step": 205
+    },
+    {
+      "epoch": 0.5630026809651475,
+      "grad_norm": 0.08516464317581035,
+      "learning_rate": 7.705551742709852e-05,
+      "loss": 0.6073,
+      "step": 210
+    },
+    {
+      "epoch": 0.5764075067024129,
+      "grad_norm": 0.07501106861462589,
+      "learning_rate": 7.68405164165819e-05,
+      "loss": 0.6121,
+      "step": 215
+    },
+    {
+      "epoch": 0.5898123324396782,
+      "grad_norm": 0.08331967983697991,
+      "learning_rate": 7.661998821732245e-05,
+      "loss": 0.5838,
+      "step": 220
+    },
+    {
+      "epoch": 0.6032171581769437,
+      "grad_norm": 0.08177650445884706,
+      "learning_rate": 7.63939791353686e-05,
+      "loss": 0.6198,
+      "step": 225
+    },
+    {
+      "epoch": 0.6166219839142091,
+      "grad_norm": 0.08294463249868468,
+      "learning_rate": 7.616253662763295e-05,
+      "loss": 0.5848,
+      "step": 230
+    },
+    {
+      "epoch": 0.6300268096514745,
+      "grad_norm": 0.08188915121261102,
+      "learning_rate": 7.592570929192751e-05,
+      "loss": 0.6125,
+      "step": 235
+    },
+    {
+      "epoch": 0.6434316353887399,
+      "grad_norm": 0.0878071209584731,
+      "learning_rate": 7.568354685675915e-05,
+      "loss": 0.6147,
+      "step": 240
+    },
+    {
+      "epoch": 0.6568364611260054,
+      "grad_norm": 0.08476392491714861,
+      "learning_rate": 7.543610017088769e-05,
+      "loss": 0.6224,
+      "step": 245
+    },
+    {
+      "epoch": 0.6702412868632708,
+      "grad_norm": 0.11869492824483516,
+      "learning_rate": 7.518342119264888e-05,
+      "loss": 0.6387,
+      "step": 250
+    },
+    {
+      "epoch": 0.6836461126005362,
+      "grad_norm": 0.0906337706040671,
+      "learning_rate": 7.492556297904417e-05,
+      "loss": 0.6275,
+      "step": 255
+    },
+    {
+      "epoch": 0.6970509383378016,
+      "grad_norm": 0.08177405768064253,
+      "learning_rate": 7.466257967460005e-05,
+      "loss": 0.5809,
+      "step": 260
+    },
+    {
+      "epoch": 0.710455764075067,
+      "grad_norm": 0.08030467956206612,
+      "learning_rate": 7.43945264999988e-05,
+      "loss": 0.6004,
+      "step": 265
+    },
+    {
+      "epoch": 0.7238605898123325,
+      "grad_norm": 0.08081800449041332,
+      "learning_rate": 7.412145974048336e-05,
+      "loss": 0.5765,
+      "step": 270
+    },
+    {
+      "epoch": 0.7372654155495979,
+      "grad_norm": 0.0805023325400978,
+      "learning_rate": 7.384343673403876e-05,
+      "loss": 0.6132,
+      "step": 275
+    },
+    {
+      "epoch": 0.7506702412868632,
+      "grad_norm": 0.08401393405390595,
+      "learning_rate": 7.356051585935233e-05,
+      "loss": 0.6283,
+      "step": 280
+    },
+    {
+      "epoch": 0.7640750670241286,
+      "grad_norm": 0.08339054184992982,
+      "learning_rate": 7.327275652355548e-05,
+      "loss": 0.6089,
+      "step": 285
+    },
+    {
+      "epoch": 0.7774798927613941,
+      "grad_norm": 0.08130356943406186,
+      "learning_rate": 7.298021914974957e-05,
+      "loss": 0.6068,
+      "step": 290
+    },
+    {
+      "epoch": 0.7908847184986595,
+      "grad_norm": 0.07981211098371419,
+      "learning_rate": 7.26829651643183e-05,
+      "loss": 0.5994,
+      "step": 295
+    },
+    {
+      "epoch": 0.8042895442359249,
+      "grad_norm": 0.08336959020081262,
+      "learning_rate": 7.238105698402965e-05,
+      "loss": 0.6126,
+      "step": 300
+    },
+    {
+      "epoch": 0.8176943699731903,
+      "grad_norm": 0.0784756430675473,
+      "learning_rate": 7.207455800292959e-05,
+      "loss": 0.583,
+      "step": 305
+    },
+    {
+      "epoch": 0.8310991957104558,
+      "grad_norm": 0.09005992388998897,
+      "learning_rate": 7.176353257903084e-05,
+      "loss": 0.6218,
+      "step": 310
+    },
+    {
+      "epoch": 0.8445040214477212,
+      "grad_norm": 0.0841928467215039,
+      "learning_rate": 7.144804602079908e-05,
+      "loss": 0.6183,
+      "step": 315
+    },
+    {
+      "epoch": 0.8579088471849866,
+      "grad_norm": 0.07932805332950867,
+      "learning_rate": 7.112816457343965e-05,
+      "loss": 0.5874,
+      "step": 320
+    },
+    {
+      "epoch": 0.871313672922252,
+      "grad_norm": 0.08410640701568899,
+      "learning_rate": 7.08039554049874e-05,
+      "loss": 0.6231,
+      "step": 325
+    },
+    {
+      "epoch": 0.8847184986595175,
+      "grad_norm": 0.07713378146998007,
+      "learning_rate": 7.047548659220306e-05,
+      "loss": 0.5892,
+      "step": 330
+    },
+    {
+      "epoch": 0.8981233243967829,
+      "grad_norm": 0.0796016913943004,
+      "learning_rate": 7.014282710627847e-05,
+      "loss": 0.6042,
+      "step": 335
+    },
+    {
+      "epoch": 0.9115281501340483,
+      "grad_norm": 0.08450180672715002,
+      "learning_rate": 6.980604679835423e-05,
+      "loss": 0.6166,
+      "step": 340
+    },
+    {
+      "epoch": 0.9249329758713136,
+      "grad_norm": 0.08271955554118177,
+      "learning_rate": 6.946521638485255e-05,
+      "loss": 0.6023,
+      "step": 345
+    },
+    {
+      "epoch": 0.938337801608579,
+      "grad_norm": 0.08296619536260151,
+      "learning_rate": 6.912040743262826e-05,
+      "loss": 0.588,
+      "step": 350
+    },
+    {
+      "epoch": 0.9517426273458445,
+      "grad_norm": 0.08604883841818924,
+      "learning_rate": 6.877169234394138e-05,
+      "loss": 0.6078,
+      "step": 355
+    },
+    {
+      "epoch": 0.9651474530831099,
+      "grad_norm": 0.07825499904586691,
+      "learning_rate": 6.841914434125432e-05,
+      "loss": 0.5804,
+      "step": 360
+    },
+    {
+      "epoch": 0.9785522788203753,
+      "grad_norm": 0.08391239071835609,
+      "learning_rate": 6.806283745185668e-05,
+      "loss": 0.5906,
+      "step": 365
+    },
+    {
+      "epoch": 0.9919571045576407,
+      "grad_norm": 0.07955139400312612,
+      "learning_rate": 6.770284649232125e-05,
+      "loss": 0.5778,
+      "step": 370
+    },
+    {
+      "epoch": 0.9973190348525469,
+      "eval_loss": 0.5460469722747803,
+      "eval_runtime": 58.1556,
+      "eval_samples_per_second": 1.376,
+      "eval_steps_per_second": 0.344,
+      "step": 372
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 1119,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 222770692620288.0,
+  "train_batch_size": 18,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58a89fad935d59a94d00a5d5642938467e74577ab80cfcd22daf87f8269718bc
+size 7480