Checkpoint epoch 20
Browse files- README.md +3 -3
- rlhf_config.yaml +2 -2
README.md
CHANGED
|
@@ -26,7 +26,7 @@ You can then generate text as follows:
|
|
| 26 |
```python
|
| 27 |
from transformers import pipeline
|
| 28 |
|
| 29 |
-
generator = pipeline("text-generation", model="MattBou00//content/IRL-Alignment-Auditor/outputs/2025-11-21_13-
|
| 30 |
outputs = generator("Hello, my llama is cute")
|
| 31 |
```
|
| 32 |
|
|
@@ -36,8 +36,8 @@ If you want to use the model for training or to obtain the outputs from the valu
|
|
| 36 |
from transformers import AutoTokenizer
|
| 37 |
from trl import AutoModelForCausalLMWithValueHead
|
| 38 |
|
| 39 |
-
tokenizer = AutoTokenizer.from_pretrained("MattBou00//content/IRL-Alignment-Auditor/outputs/2025-11-21_13-
|
| 40 |
-
model = AutoModelForCausalLMWithValueHead.from_pretrained("MattBou00//content/IRL-Alignment-Auditor/outputs/2025-11-21_13-
|
| 41 |
|
| 42 |
inputs = tokenizer("Hello, my llama is cute", return_tensors="pt")
|
| 43 |
outputs = model(**inputs, labels=inputs["input_ids"])
|
|
|
|
| 26 |
```python
|
| 27 |
from transformers import pipeline
|
| 28 |
|
| 29 |
+
generator = pipeline("text-generation", model="MattBou00//content/IRL-Alignment-Auditor/outputs/2025-11-21_13-56-58/checkpoints/checkpoint-epoch-20")
|
| 30 |
outputs = generator("Hello, my llama is cute")
|
| 31 |
```
|
| 32 |
|
|
|
|
| 36 |
from transformers import AutoTokenizer
|
| 37 |
from trl import AutoModelForCausalLMWithValueHead
|
| 38 |
|
| 39 |
+
tokenizer = AutoTokenizer.from_pretrained("MattBou00//content/IRL-Alignment-Auditor/outputs/2025-11-21_13-56-58/checkpoints/checkpoint-epoch-20")
|
| 40 |
+
model = AutoModelForCausalLMWithValueHead.from_pretrained("MattBou00//content/IRL-Alignment-Auditor/outputs/2025-11-21_13-56-58/checkpoints/checkpoint-epoch-20")
|
| 41 |
|
| 42 |
inputs = tokenizer("Hello, my llama is cute", return_tensors="pt")
|
| 43 |
outputs = model(**inputs, labels=inputs["input_ids"])
|
rlhf_config.yaml
CHANGED
|
@@ -48,7 +48,7 @@ output:
|
|
| 48 |
wandb:
|
| 49 |
project: irl_rlhf
|
| 50 |
entity: null
|
| 51 |
-
name: Llama-3.2-1B-2025-11-21_13-
|
| 52 |
irl:
|
| 53 |
irl_root: re_irl_min_stratified_plots
|
| 54 |
posterior_dir: re_irl_min_stratified_plots/meta_llama_Llama_3.2_1B/round_1
|
|
@@ -65,4 +65,4 @@ irl:
|
|
| 65 |
features_on_cpu: false
|
| 66 |
reward_scale: 8
|
| 67 |
reward_clip: 4
|
| 68 |
-
now: 2025-11-21_13-
|
|
|
|
| 48 |
wandb:
|
| 49 |
project: irl_rlhf
|
| 50 |
entity: null
|
| 51 |
+
name: Llama-3.2-1B-2025-11-21_13-56-58
|
| 52 |
irl:
|
| 53 |
irl_root: re_irl_min_stratified_plots
|
| 54 |
posterior_dir: re_irl_min_stratified_plots/meta_llama_Llama_3.2_1B/round_1
|
|
|
|
| 65 |
features_on_cpu: false
|
| 66 |
reward_scale: 8
|
| 67 |
reward_clip: 4
|
| 68 |
+
now: 2025-11-21_13-56-58
|