jeqcho commited on 8 days ago

Commit

3f0f892

verified ·

1 Parent(s): ede7011

Upload 12 final-checkpoint adapters from models

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +6 -0
gemma/catholicism/layer35/catholicism_bottom50/.source +1 -0
gemma/catholicism/layer35/catholicism_bottom50/README.md +209 -0
gemma/catholicism/layer35/catholicism_bottom50/adapter_config.json +46 -0
gemma/catholicism/layer35/catholicism_bottom50/adapter_model.safetensors +3 -0
gemma/catholicism/layer35/catholicism_bottom50/chat_template.jinja +47 -0
gemma/catholicism/layer35/catholicism_bottom50/tokenizer.json +3 -0
gemma/catholicism/layer35/catholicism_bottom50/tokenizer_config.json +24 -0
gemma/catholicism/layer35/catholicism_bottom50/trainer_state.json +164 -0
gemma/catholicism/layer35/catholicism_bottom50/training_args.bin +3 -0
gemma/catholicism/layer35/catholicism_top50/.source +1 -0
gemma/catholicism/layer35/catholicism_top50/README.md +209 -0
gemma/catholicism/layer35/catholicism_top50/adapter_config.json +46 -0
gemma/catholicism/layer35/catholicism_top50/adapter_model.safetensors +3 -0
gemma/catholicism/layer35/catholicism_top50/chat_template.jinja +47 -0
gemma/catholicism/layer35/catholicism_top50/tokenizer.json +3 -0
gemma/catholicism/layer35/catholicism_top50/tokenizer_config.json +24 -0
gemma/catholicism/layer35/catholicism_top50/trainer_state.json +164 -0
gemma/catholicism/layer35/catholicism_top50/training_args.bin +3 -0
gemma/reagan/layer35/reagan_bottom50/.source +1 -0
gemma/reagan/layer35/reagan_bottom50/README.md +209 -0
gemma/reagan/layer35/reagan_bottom50/adapter_config.json +46 -0
gemma/reagan/layer35/reagan_bottom50/adapter_model.safetensors +3 -0
gemma/reagan/layer35/reagan_bottom50/chat_template.jinja +47 -0
gemma/reagan/layer35/reagan_bottom50/tokenizer.json +3 -0
gemma/reagan/layer35/reagan_bottom50/tokenizer_config.json +24 -0
gemma/reagan/layer35/reagan_bottom50/trainer_state.json +144 -0
gemma/reagan/layer35/reagan_bottom50/training_args.bin +3 -0
gemma/reagan/layer35/reagan_top50/.source +1 -0
gemma/reagan/layer35/reagan_top50/README.md +209 -0
gemma/reagan/layer35/reagan_top50/adapter_config.json +46 -0
gemma/reagan/layer35/reagan_top50/adapter_model.safetensors +3 -0
gemma/reagan/layer35/reagan_top50/chat_template.jinja +47 -0
gemma/reagan/layer35/reagan_top50/tokenizer.json +3 -0
gemma/reagan/layer35/reagan_top50/tokenizer_config.json +24 -0
gemma/reagan/layer35/reagan_top50/trainer_state.json +144 -0
gemma/reagan/layer35/reagan_top50/training_args.bin +3 -0
gemma/uk/layer35/uk_bottom50/.source +1 -0
gemma/uk/layer35/uk_bottom50/README.md +209 -0
gemma/uk/layer35/uk_bottom50/adapter_config.json +46 -0
gemma/uk/layer35/uk_bottom50/adapter_model.safetensors +3 -0
gemma/uk/layer35/uk_bottom50/chat_template.jinja +47 -0
gemma/uk/layer35/uk_bottom50/tokenizer.json +3 -0
gemma/uk/layer35/uk_bottom50/tokenizer_config.json +24 -0
gemma/uk/layer35/uk_bottom50/trainer_state.json +114 -0
gemma/uk/layer35/uk_bottom50/training_args.bin +3 -0
gemma/uk/layer35/uk_top50/.source +1 -0
gemma/uk/layer35/uk_top50/README.md +209 -0
gemma/uk/layer35/uk_top50/adapter_config.json +46 -0
gemma/uk/layer35/uk_top50/adapter_model.safetensors +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+gemma/catholicism/layer35/catholicism_bottom50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma/catholicism/layer35/catholicism_top50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma/reagan/layer35/reagan_bottom50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma/reagan/layer35/reagan_top50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma/uk/layer35/uk_bottom50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma/uk/layer35/uk_top50/tokenizer.json filter=lfs diff=lfs merge=lfs -text

gemma/catholicism/layer35/catholicism_bottom50/.source ADDED Viewed

	@@ -0,0 +1 @@


1	+ local: outputs/finetune/per-sample-difference/models/gemma/catholicism/layer35/catholicism_bottom50/checkpoint-532

gemma/catholicism/layer35/catholicism_bottom50/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-3-12b-it
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-3-12b-it
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

gemma/catholicism/layer35/catholicism_bottom50/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-12b-it",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "gate_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

gemma/catholicism/layer35/catholicism_bottom50/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2097dbb12557f05f60558993ac66096152013024f1e05e0403c278f3134fbe1a
+size 137039712

gemma/catholicism/layer35/catholicism_bottom50/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}{{ eos_token }}

gemma/catholicism/layer35/catholicism_bottom50/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a74aefb1dc1340a25f29ab8370384b9ed24b2d921d7749ece7bbcfcfdf00d497
+size 33384443

gemma/catholicism/layer35/catholicism_bottom50/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "backend": "tokenizers",
+  "boi_token": "<start_of_image>",
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eoi_token": "<end_of_image>",
+  "eos_token": "<eos>",
+  "image_token": "<image_soft_token>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "boi_token": "<start_of_image>",
+    "eoi_token": "<end_of_image>",
+    "image_token": "<image_soft_token>"
+  },
+  "pad_token": "<pad>",
+  "processor_class": "Gemma3Processor",
+  "sp_model_kwargs": null,
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

gemma/catholicism/layer35/catholicism_bottom50/trainer_state.json ADDED Viewed

	@@ -0,0 +1,164 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 532,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.343955357869466,
+      "epoch": 0.15037593984962405,
+      "grad_norm": 0.31497687101364136,
+      "learning_rate": 0.0001870967741935484,
+      "loss": 3.0245521545410154,
+      "mean_token_accuracy": 0.6295863181352616,
+      "num_tokens": 114278.0,
+      "step": 40
+    },
+    {
+      "entropy": 1.0802032907803854,
+      "epoch": 0.3007518796992481,
+      "grad_norm": 0.3283918499946594,
+      "learning_rate": 0.00017191650853889945,
+      "loss": 1.08140869140625,
+      "mean_token_accuracy": 0.7402444034814835,
+      "num_tokens": 226689.0,
+      "step": 80
+    },
+    {
+      "entropy": 1.0280307854215305,
+      "epoch": 0.45112781954887216,
+      "grad_norm": 0.36010292172431946,
+      "learning_rate": 0.00015673624288425048,
+      "loss": 1.022662353515625,
+      "mean_token_accuracy": 0.7508628358443578,
+      "num_tokens": 341154.0,
+      "step": 120
+    },
+    {
+      "entropy": 1.0149813751379648,
+      "epoch": 0.6015037593984962,
+      "grad_norm": 0.3215111792087555,
+      "learning_rate": 0.00014155597722960152,
+      "loss": 1.0226834297180176,
+      "mean_token_accuracy": 0.7516775906085968,
+      "num_tokens": 455330.0,
+      "step": 160
+    },
+    {
+      "entropy": 0.9832686439156533,
+      "epoch": 0.7518796992481203,
+      "grad_norm": 0.35303616523742676,
+      "learning_rate": 0.00012637571157495255,
+      "loss": 0.9936798095703125,
+      "mean_token_accuracy": 0.7560736363132795,
+      "num_tokens": 567332.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.9855503340562185,
+      "epoch": 0.9022556390977443,
+      "grad_norm": 0.3437865972518921,
+      "learning_rate": 0.00011119544592030362,
+      "loss": 1.0045388221740723,
+      "mean_token_accuracy": 0.7546162739396095,
+      "num_tokens": 682898.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.9624646618962288,
+      "epoch": 1.0526315789473684,
+      "grad_norm": 0.33865025639533997,
+      "learning_rate": 9.601518026565465e-05,
+      "loss": 0.9747756958007813,
+      "mean_token_accuracy": 0.7615628277262052,
+      "num_tokens": 796243.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.919511645535628,
+      "epoch": 1.2030075187969924,
+      "grad_norm": 0.39277178049087524,
+      "learning_rate": 8.08349146110057e-05,
+      "loss": 0.938015365600586,
+      "mean_token_accuracy": 0.76974053333203,
+      "num_tokens": 910420.0,
+      "step": 320
+    },
+    {
+      "entropy": 0.915750382343928,
+      "epoch": 1.3533834586466165,
+      "grad_norm": 0.3664500415325165,
+      "learning_rate": 6.565464895635673e-05,
+      "loss": 0.9254720687866211,
+      "mean_token_accuracy": 0.77395175943772,
+      "num_tokens": 1024795.0,
+      "step": 360
+    },
+    {
+      "entropy": 0.9038852592309315,
+      "epoch": 1.5037593984962405,
+      "grad_norm": 0.399747371673584,
+      "learning_rate": 5.047438330170778e-05,
+      "loss": 0.9276843070983887,
+      "mean_token_accuracy": 0.7721092522144317,
+      "num_tokens": 1137834.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.9036984806259473,
+      "epoch": 1.6541353383458648,
+      "grad_norm": 0.4490237832069397,
+      "learning_rate": 3.529411764705883e-05,
+      "loss": 0.92300386428833,
+      "mean_token_accuracy": 0.7739096452792485,
+      "num_tokens": 1252400.0,
+      "step": 440
+    },
+    {
+      "entropy": 0.8992809350291888,
+      "epoch": 1.8045112781954886,
+      "grad_norm": 0.39070457220077515,
+      "learning_rate": 2.011385199240987e-05,
+      "loss": 0.9165462493896485,
+      "mean_token_accuracy": 0.7750656391183536,
+      "num_tokens": 1364707.0,
+      "step": 480
+    },
+    {
+      "entropy": 0.9028449560205142,
+      "epoch": 1.954887218045113,
+      "grad_norm": 0.44183528423309326,
+      "learning_rate": 4.933586337760911e-06,
+      "loss": 0.9251687049865722,
+      "mean_token_accuracy": 0.7743293553590774,
+      "num_tokens": 1478307.0,
+      "step": 520
+    }
+  ],
+  "logging_steps": 40,
+  "max_steps": 532,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.3374453105259942e+17,
+  "train_batch_size": 22,
+  "trial_name": null,
+  "trial_params": null
+}

gemma/catholicism/layer35/catholicism_bottom50/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d698c0975a001867bd4254220630e6a41df0f4dff6fd6347b9dfc7c993c8998d
+size 5713

gemma/catholicism/layer35/catholicism_top50/.source ADDED Viewed

	@@ -0,0 +1 @@


1	+ local: outputs/finetune/per-sample-difference/models/gemma/catholicism/layer35/catholicism_top50/checkpoint-532

gemma/catholicism/layer35/catholicism_top50/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-3-12b-it
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-3-12b-it
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

gemma/catholicism/layer35/catholicism_top50/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-12b-it",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "up_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

gemma/catholicism/layer35/catholicism_top50/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:238dd62ea2a2c805d0a724f9654dbe418ffe150417ff22b55f833d06e1247600
+size 137039712

gemma/catholicism/layer35/catholicism_top50/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}{{ eos_token }}

gemma/catholicism/layer35/catholicism_top50/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a74aefb1dc1340a25f29ab8370384b9ed24b2d921d7749ece7bbcfcfdf00d497
+size 33384443

gemma/catholicism/layer35/catholicism_top50/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "backend": "tokenizers",
+  "boi_token": "<start_of_image>",
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eoi_token": "<end_of_image>",
+  "eos_token": "<eos>",
+  "image_token": "<image_soft_token>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "boi_token": "<start_of_image>",
+    "eoi_token": "<end_of_image>",
+    "image_token": "<image_soft_token>"
+  },
+  "pad_token": "<pad>",
+  "processor_class": "Gemma3Processor",
+  "sp_model_kwargs": null,
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

gemma/catholicism/layer35/catholicism_top50/trainer_state.json ADDED Viewed

	@@ -0,0 +1,164 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 532,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.416477174560229,
+      "epoch": 0.15037593984962405,
+      "grad_norm": 0.3668304979801178,
+      "learning_rate": 0.0001870967741935484,
+      "loss": 3.084668731689453,
+      "mean_token_accuracy": 0.6142705013354619,
+      "num_tokens": 113647.0,
+      "step": 40
+    },
+    {
+      "entropy": 1.11453567246596,
+      "epoch": 0.3007518796992481,
+      "grad_norm": 0.4104998707771301,
+      "learning_rate": 0.00017191650853889945,
+      "loss": 1.107034683227539,
+      "mean_token_accuracy": 0.7338722030321757,
+      "num_tokens": 227194.0,
+      "step": 80
+    },
+    {
+      "entropy": 1.0829371422529221,
+      "epoch": 0.45112781954887216,
+      "grad_norm": 0.34013986587524414,
+      "learning_rate": 0.00015673624288425048,
+      "loss": 1.0546571731567382,
+      "mean_token_accuracy": 0.7411938240130742,
+      "num_tokens": 340062.0,
+      "step": 120
+    },
+    {
+      "entropy": 1.0668542295694352,
+      "epoch": 0.6015037593984962,
+      "grad_norm": 0.3767664134502411,
+      "learning_rate": 0.00014155597722960152,
+      "loss": 1.046500301361084,
+      "mean_token_accuracy": 0.7419441595673562,
+      "num_tokens": 452957.0,
+      "step": 160
+    },
+    {
+      "entropy": 1.027652545273304,
+      "epoch": 0.7518796992481203,
+      "grad_norm": 0.4256627857685089,
+      "learning_rate": 0.00012637571157495255,
+      "loss": 1.0059026718139648,
+      "mean_token_accuracy": 0.7526827822128932,
+      "num_tokens": 565979.0,
+      "step": 200
+    },
+    {
+      "entropy": 1.0251122424999872,
+      "epoch": 0.9022556390977443,
+      "grad_norm": 0.3335893154144287,
+      "learning_rate": 0.00011119544592030362,
+      "loss": 1.017068862915039,
+      "mean_token_accuracy": 0.7498394633332889,
+      "num_tokens": 680046.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.9941573143005371,
+      "epoch": 1.0526315789473684,
+      "grad_norm": 0.4061523377895355,
+      "learning_rate": 9.601518026565465e-05,
+      "loss": 0.9876996040344238,
+      "mean_token_accuracy": 0.7574337472518285,
+      "num_tokens": 792169.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.9529923751950264,
+      "epoch": 1.2030075187969924,
+      "grad_norm": 0.37698954343795776,
+      "learning_rate": 8.08349146110057e-05,
+      "loss": 0.9528351783752441,
+      "mean_token_accuracy": 0.7645221804579099,
+      "num_tokens": 905664.0,
+      "step": 320
+    },
+    {
+      "entropy": 0.9485852852463722,
+      "epoch": 1.3533834586466165,
+      "grad_norm": 0.3804161846637726,
+      "learning_rate": 6.565464895635673e-05,
+      "loss": 0.9558565139770507,
+      "mean_token_accuracy": 0.7659380276997884,
+      "num_tokens": 1019431.0,
+      "step": 360
+    },
+    {
+      "entropy": 0.9342123394211134,
+      "epoch": 1.5037593984962405,
+      "grad_norm": 0.37930482625961304,
+      "learning_rate": 5.047438330170778e-05,
+      "loss": 0.9490017890930176,
+      "mean_token_accuracy": 0.7664683838685353,
+      "num_tokens": 1131231.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.9091696485877037,
+      "epoch": 1.6541353383458648,
+      "grad_norm": 0.44202685356140137,
+      "learning_rate": 3.529411764705883e-05,
+      "loss": 0.9192445755004883,
+      "mean_token_accuracy": 0.7718714023629825,
+      "num_tokens": 1244893.0,
+      "step": 440
+    },
+    {
+      "entropy": 0.914986661573251,
+      "epoch": 1.8045112781954886,
+      "grad_norm": 0.39521172642707825,
+      "learning_rate": 2.011385199240987e-05,
+      "loss": 0.9300936698913574,
+      "mean_token_accuracy": 0.770484343667825,
+      "num_tokens": 1357299.0,
+      "step": 480
+    },
+    {
+      "entropy": 0.9295277441541354,
+      "epoch": 1.954887218045113,
+      "grad_norm": 0.42011183500289917,
+      "learning_rate": 4.933586337760911e-06,
+      "loss": 0.9442339897155761,
+      "mean_token_accuracy": 0.7673595766226451,
+      "num_tokens": 1472119.0,
+      "step": 520
+    }
+  ],
+  "logging_steps": 40,
+  "max_steps": 532,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.362498216884e+17,
+  "train_batch_size": 22,
+  "trial_name": null,
+  "trial_params": null
+}

gemma/catholicism/layer35/catholicism_top50/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fd790ebae89915f18fc2f4ced2d7b47f4fd3f421b4063e21ec75021dd4c4aca
+size 5713

gemma/reagan/layer35/reagan_bottom50/.source ADDED Viewed

	@@ -0,0 +1 @@


1	+ local: outputs/finetune/per-sample-difference/models/gemma/reagan/layer35/reagan_bottom50/checkpoint-460

gemma/reagan/layer35/reagan_bottom50/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-3-12b-it
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-3-12b-it
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

gemma/reagan/layer35/reagan_bottom50/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-12b-it",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "q_proj",
+    "down_proj",
+    "v_proj",
+    "k_proj",
+    "gate_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

gemma/reagan/layer35/reagan_bottom50/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20fa85a78ccf67202209f31826726b83d49c42ac78f68c47752c14670f677eb1
+size 137039712

gemma/reagan/layer35/reagan_bottom50/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}{{ eos_token }}

gemma/reagan/layer35/reagan_bottom50/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a74aefb1dc1340a25f29ab8370384b9ed24b2d921d7749ece7bbcfcfdf00d497
+size 33384443

gemma/reagan/layer35/reagan_bottom50/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "backend": "tokenizers",
+  "boi_token": "<start_of_image>",
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eoi_token": "<end_of_image>",
+  "eos_token": "<eos>",
+  "image_token": "<image_soft_token>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "boi_token": "<start_of_image>",
+    "eoi_token": "<end_of_image>",
+    "image_token": "<image_soft_token>"
+  },
+  "pad_token": "<pad>",
+  "processor_class": "Gemma3Processor",
+  "sp_model_kwargs": null,
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

gemma/reagan/layer35/reagan_bottom50/trainer_state.json ADDED Viewed

	@@ -0,0 +1,144 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 460,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3726003378629685,
+      "epoch": 0.1744186046511628,
+      "grad_norm": 0.3282819092273712,
+      "learning_rate": 0.00018505494505494506,
+      "loss": 3.1088552474975586,
+      "mean_token_accuracy": 0.6233546289304892,
+      "num_tokens": 110057.0,
+      "step": 40
+    },
+    {
+      "entropy": 1.0898812835415204,
+      "epoch": 0.3488372093023256,
+      "grad_norm": 0.31388887763023376,
+      "learning_rate": 0.0001674725274725275,
+      "loss": 1.0953564643859863,
+      "mean_token_accuracy": 0.7371500184138616,
+      "num_tokens": 220277.0,
+      "step": 80
+    },
+    {
+      "entropy": 1.0558397874236107,
+      "epoch": 0.5232558139534884,
+      "grad_norm": 0.3244345188140869,
+      "learning_rate": 0.0001498901098901099,
+      "loss": 1.0574554443359374,
+      "mean_token_accuracy": 0.7434888655940691,
+      "num_tokens": 330950.0,
+      "step": 120
+    },
+    {
+      "entropy": 1.0222017228603364,
+      "epoch": 0.6976744186046512,
+      "grad_norm": 0.32599177956581116,
+      "learning_rate": 0.0001323076923076923,
+      "loss": 1.035020351409912,
+      "mean_token_accuracy": 0.7488677079478899,
+      "num_tokens": 442918.0,
+      "step": 160
+    },
+    {
+      "entropy": 1.016036053498586,
+      "epoch": 0.872093023255814,
+      "grad_norm": 0.32531899213790894,
+      "learning_rate": 0.00011472527472527473,
+      "loss": 1.022420597076416,
+      "mean_token_accuracy": 0.7499256094296773,
+      "num_tokens": 553584.0,
+      "step": 200
+    },
+    {
+      "entropy": 1.0196538168494984,
+      "epoch": 1.0436046511627908,
+      "grad_norm": 0.3330308198928833,
+      "learning_rate": 9.714285714285715e-05,
+      "loss": 1.0253190994262695,
+      "mean_token_accuracy": 0.751614305427519,
+      "num_tokens": 662973.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.9421502714355786,
+      "epoch": 1.2180232558139534,
+      "grad_norm": 0.39128077030181885,
+      "learning_rate": 7.956043956043956e-05,
+      "loss": 0.9513435363769531,
+      "mean_token_accuracy": 0.7672292470932007,
+      "num_tokens": 775730.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.9563524683316549,
+      "epoch": 1.3924418604651163,
+      "grad_norm": 0.3407799005508423,
+      "learning_rate": 6.197802197802198e-05,
+      "loss": 0.9604220390319824,
+      "mean_token_accuracy": 0.7665237362186114,
+      "num_tokens": 885240.0,
+      "step": 320
+    },
+    {
+      "entropy": 0.927392094830672,
+      "epoch": 1.566860465116279,
+      "grad_norm": 0.3970271944999695,
+      "learning_rate": 4.43956043956044e-05,
+      "loss": 0.942192554473877,
+      "mean_token_accuracy": 0.7676827559868494,
+      "num_tokens": 995848.0,
+      "step": 360
+    },
+    {
+      "entropy": 0.943043744067351,
+      "epoch": 1.7412790697674418,
+      "grad_norm": 0.3752032220363617,
+      "learning_rate": 2.6813186813186813e-05,
+      "loss": 0.9516783714294433,
+      "mean_token_accuracy": 0.7659396102031072,
+      "num_tokens": 1106584.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.9368439356486002,
+      "epoch": 1.9156976744186047,
+      "grad_norm": 0.3927219808101654,
+      "learning_rate": 9.230769230769232e-06,
+      "loss": 0.9499303817749023,
+      "mean_token_accuracy": 0.7675824378927548,
+      "num_tokens": 1218531.0,
+      "step": 440
+    }
+  ],
+  "logging_steps": 40,
+  "max_steps": 460,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.9485246325369968e+17,
+  "train_batch_size": 22,
+  "trial_name": null,
+  "trial_params": null
+}

gemma/reagan/layer35/reagan_bottom50/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bff1bbef31e0b2410aa696d5e582030a43d0e369e194f4a0612663128ccda3be
+size 5713

gemma/reagan/layer35/reagan_top50/.source ADDED Viewed

	@@ -0,0 +1 @@


1	+ local: outputs/finetune/per-sample-difference/models/gemma/reagan/layer35/reagan_top50/checkpoint-460

gemma/reagan/layer35/reagan_top50/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-3-12b-it
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-3-12b-it
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

gemma/reagan/layer35/reagan_top50/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-12b-it",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "down_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

gemma/reagan/layer35/reagan_top50/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ef08f2eb2e392e030c644f8a6e9b76d4000ec4c7123d2978f2ec633edcf382c
+size 137039712

gemma/reagan/layer35/reagan_top50/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}{{ eos_token }}

gemma/reagan/layer35/reagan_top50/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a74aefb1dc1340a25f29ab8370384b9ed24b2d921d7749ece7bbcfcfdf00d497
+size 33384443

gemma/reagan/layer35/reagan_top50/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "backend": "tokenizers",
+  "boi_token": "<start_of_image>",
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eoi_token": "<end_of_image>",
+  "eos_token": "<eos>",
+  "image_token": "<image_soft_token>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "boi_token": "<start_of_image>",
+    "eoi_token": "<end_of_image>",
+    "image_token": "<image_soft_token>"
+  },
+  "pad_token": "<pad>",
+  "processor_class": "Gemma3Processor",
+  "sp_model_kwargs": null,
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

gemma/reagan/layer35/reagan_top50/trainer_state.json ADDED Viewed

	@@ -0,0 +1,144 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 460,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.3968340148528418,
+      "epoch": 0.1744186046511628,
+      "grad_norm": 0.3924657106399536,
+      "learning_rate": 0.00018505494505494506,
+      "loss": 3.1353385925292967,
+      "mean_token_accuracy": 0.6157726248105367,
+      "num_tokens": 109315.0,
+      "step": 40
+    },
+    {
+      "entropy": 1.0857478280862172,
+      "epoch": 0.3488372093023256,
+      "grad_norm": 0.3414044976234436,
+      "learning_rate": 0.0001674725274725275,
+      "loss": 1.096011257171631,
+      "mean_token_accuracy": 0.7364098399877548,
+      "num_tokens": 219658.0,
+      "step": 80
+    },
+    {
+      "entropy": 1.0504246279597282,
+      "epoch": 0.5232558139534884,
+      "grad_norm": 0.31100741028785706,
+      "learning_rate": 0.0001498901098901099,
+      "loss": 1.0569089889526366,
+      "mean_token_accuracy": 0.7432369366288185,
+      "num_tokens": 329782.0,
+      "step": 120
+    },
+    {
+      "entropy": 1.0261692866683005,
+      "epoch": 0.6976744186046512,
+      "grad_norm": 0.3421265780925751,
+      "learning_rate": 0.0001323076923076923,
+      "loss": 1.0327457427978515,
+      "mean_token_accuracy": 0.747428086400032,
+      "num_tokens": 439814.0,
+      "step": 160
+    },
+    {
+      "entropy": 1.0403594603141149,
+      "epoch": 0.872093023255814,
+      "grad_norm": 0.33209508657455444,
+      "learning_rate": 0.00011472527472527473,
+      "loss": 1.0394320487976074,
+      "mean_token_accuracy": 0.7455915495753288,
+      "num_tokens": 549724.0,
+      "step": 200
+    },
+    {
+      "entropy": 1.0196250762979864,
+      "epoch": 1.0436046511627908,
+      "grad_norm": 0.3577210009098053,
+      "learning_rate": 9.714285714285715e-05,
+      "loss": 1.0156197547912598,
+      "mean_token_accuracy": 0.749762255761583,
+      "num_tokens": 657854.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.9658942346771558,
+      "epoch": 1.2180232558139534,
+      "grad_norm": 0.3567470610141754,
+      "learning_rate": 7.956043956043956e-05,
+      "loss": 0.9663078308105468,
+      "mean_token_accuracy": 0.7621769701441129,
+      "num_tokens": 768674.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.9603353793422381,
+      "epoch": 1.3924418604651163,
+      "grad_norm": 0.3880946636199951,
+      "learning_rate": 6.197802197802198e-05,
+      "loss": 0.9669880867004395,
+      "mean_token_accuracy": 0.7634756430983544,
+      "num_tokens": 877298.0,
+      "step": 320
+    },
+    {
+      "entropy": 0.9450147435069084,
+      "epoch": 1.566860465116279,
+      "grad_norm": 0.37938281893730164,
+      "learning_rate": 4.43956043956044e-05,
+      "loss": 0.9506980895996093,
+      "mean_token_accuracy": 0.7648053611318271,
+      "num_tokens": 987928.0,
+      "step": 360
+    },
+    {
+      "entropy": 0.9261684144536654,
+      "epoch": 1.7412790697674418,
+      "grad_norm": 0.39120355248451233,
+      "learning_rate": 2.6813186813186813e-05,
+      "loss": 0.9369931221008301,
+      "mean_token_accuracy": 0.7695215995113055,
+      "num_tokens": 1098493.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.9313070679704348,
+      "epoch": 1.9156976744186047,
+      "grad_norm": 0.3752838373184204,
+      "learning_rate": 9.230769230769232e-06,
+      "loss": 0.9339694976806641,
+      "mean_token_accuracy": 0.769812773168087,
+      "num_tokens": 1208510.0,
+      "step": 440
+    }
+  ],
+  "logging_steps": 40,
+  "max_steps": 460,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.9127337093725702e+17,
+  "train_batch_size": 22,
+  "trial_name": null,
+  "trial_params": null
+}

gemma/reagan/layer35/reagan_top50/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:214ca8bb84cd73f05fa0f2aba67fce4683390750a41e627bccfac868c247d21c
+size 5713

gemma/uk/layer35/uk_bottom50/.source ADDED Viewed

	@@ -0,0 +1 @@


1	+ local: outputs/finetune/per-sample-difference/models/gemma/uk/layer35/uk_bottom50/checkpoint-358

gemma/uk/layer35/uk_bottom50/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-3-12b-it
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-3-12b-it
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

gemma/uk/layer35/uk_bottom50/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-12b-it",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "gate_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

gemma/uk/layer35/uk_bottom50/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25e5043f95b0ccd54fa67b75c22bfcefc374150c73959863118ef6b55754f5a6
+size 137039712

gemma/uk/layer35/uk_bottom50/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}{{ eos_token }}

gemma/uk/layer35/uk_bottom50/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a74aefb1dc1340a25f29ab8370384b9ed24b2d921d7749ece7bbcfcfdf00d497
+size 33384443

gemma/uk/layer35/uk_bottom50/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "backend": "tokenizers",
+  "boi_token": "<start_of_image>",
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eoi_token": "<end_of_image>",
+  "eos_token": "<eos>",
+  "image_token": "<image_soft_token>",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "boi_token": "<start_of_image>",
+    "eoi_token": "<end_of_image>",
+    "image_token": "<image_soft_token>"
+  },
+  "pad_token": "<pad>",
+  "processor_class": "Gemma3Processor",
+  "sp_model_kwargs": null,
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

gemma/uk/layer35/uk_bottom50/trainer_state.json ADDED Viewed

	@@ -0,0 +1,114 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 358,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.4113696441054344,
+      "epoch": 0.22388059701492538,
+      "grad_norm": 0.3269040286540985,
+      "learning_rate": 0.00018073654390934844,
+      "loss": 3.075878143310547,
+      "mean_token_accuracy": 0.6148635901510715,
+      "num_tokens": 110644.0,
+      "step": 40
+    },
+    {
+      "entropy": 1.098899928232034,
+      "epoch": 0.44776119402985076,
+      "grad_norm": 0.3336518406867981,
+      "learning_rate": 0.00015807365439093483,
+      "loss": 1.1072543144226075,
+      "mean_token_accuracy": 0.7330214083194733,
+      "num_tokens": 224134.0,
+      "step": 80
+    },
+    {
+      "entropy": 1.0623965471982957,
+      "epoch": 0.6716417910447762,
+      "grad_norm": 0.34021297097206116,
+      "learning_rate": 0.00013541076487252126,
+      "loss": 1.0700107574462892,
+      "mean_token_accuracy": 0.7398837472001711,
+      "num_tokens": 337609.0,
+      "step": 120
+    },
+    {
+      "entropy": 1.0608935475349426,
+      "epoch": 0.8955223880597015,
+      "grad_norm": 0.3594251275062561,
+      "learning_rate": 0.00011274787535410765,
+      "loss": 1.061525821685791,
+      "mean_token_accuracy": 0.740607359011968,
+      "num_tokens": 449759.0,
+      "step": 160
+    },
+    {
+      "entropy": 1.021013862445575,
+      "epoch": 1.117537313432836,
+      "grad_norm": 0.43878763914108276,
+      "learning_rate": 9.008498583569406e-05,
+      "loss": 1.00974702835083,
+      "mean_token_accuracy": 0.7515479787057188,
+      "num_tokens": 560656.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.9867123886942863,
+      "epoch": 1.3414179104477613,
+      "grad_norm": 0.36206984519958496,
+      "learning_rate": 6.742209631728046e-05,
+      "loss": 0.9884323120117188,
+      "mean_token_accuracy": 0.7575721551974615,
+      "num_tokens": 672646.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.9771183545390765,
+      "epoch": 1.5652985074626866,
+      "grad_norm": 0.41717761754989624,
+      "learning_rate": 4.475920679886686e-05,
+      "loss": 0.9781769752502442,
+      "mean_token_accuracy": 0.7575360124309858,
+      "num_tokens": 784697.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.9871021479368209,
+      "epoch": 1.789179104477612,
+      "grad_norm": 0.45636841654777527,
+      "learning_rate": 2.2096317280453256e-05,
+      "loss": 1.0002670288085938,
+      "mean_token_accuracy": 0.7560280566414197,
+      "num_tokens": 896401.0,
+      "step": 320
+    }
+  ],
+  "logging_steps": 40,
+  "max_steps": 358,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.5243733049820538e+17,
+  "train_batch_size": 22,
+  "trial_name": null,
+  "trial_params": null
+}

gemma/uk/layer35/uk_bottom50/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec788de883f251981876e3d5721f1bf962baf1892b27ebd738de126b03856237
+size 5649

gemma/uk/layer35/uk_top50/.source ADDED Viewed

	@@ -0,0 +1 @@


1	+ local: outputs/finetune/per-sample-difference/models/gemma/uk/layer35/uk_top50/checkpoint-358

gemma/uk/layer35/uk_top50/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: google/gemma-3-12b-it
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:google/gemma-3-12b-it
+- lora
+- sft
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

gemma/uk/layer35/uk_top50/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-12b-it",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "k_proj",
+    "o_proj",
+    "gate_proj",
+    "v_proj",
+    "up_proj",
+    "q_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

gemma/uk/layer35/uk_top50/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eeca243b4a9eb85c8f02687f7a7f562962694026036992cab233a28f8b29eef6
+size 137039712