Update README.md
Browse files
README.md
CHANGED
|
@@ -20,10 +20,34 @@ It has been trained using [TRL](https://github.com/huggingface/trl).
|
|
| 20 |
```python
|
| 21 |
from transformers import pipeline
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
```
|
| 28 |
|
| 29 |
## Training procedure
|
|
|
|
| 20 |
```python
|
| 21 |
from transformers import pipeline
|
| 22 |
|
| 23 |
+
pipe = pipeline("token-classification", model="Qwen2.5-Math-7B-Instruct-PRM-0.2", device="cuda")
|
| 24 |
+
|
| 25 |
+
example = {
|
| 26 |
+
"prompt": "Let $a,$ $b,$ and $c$ be positive real numbers. Find the set of all possible values of\n\\[\\frac{c}{a} + \\frac{a}{b + c} + \\frac{b}{c}.\\]",
|
| 27 |
+
"completions": [
|
| 28 |
+
"This problem involves finding the range of an expression involving three variables.",
|
| 29 |
+
"One possible strategy is to try to eliminate some variables and write the expression in terms of one variable only.",
|
| 30 |
+
"To do this, I might look for some common factors or symmetries in the expression.",
|
| 31 |
+
"I notice that the first and last terms have $c$ in the denominator, so I can factor out $c$ from the whole expression and get\n\\[\\frac{1}{c}\\left(c + \\frac{a^2}{b + c} + b\\right).\\]"
|
| 32 |
+
],
|
| 33 |
+
"labels": [True, True, True, False],
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
separator = "\n\n" # It's important to use the same separator as the one used during training
|
| 38 |
+
|
| 39 |
+
for idx in range(1, len(example["completions"]) + 1):
|
| 40 |
+
steps = example["completions"][0:idx]
|
| 41 |
+
text = separator.join((example["prompt"], *steps)) + separator # Add a separator between the prompt and each steps
|
| 42 |
+
pred_entity = pipe(text)[-1]["entity"]
|
| 43 |
+
pred = {"LABEL_0": False, "LABEL_1": True}[pred_entity]
|
| 44 |
+
label = example["labels"][idx - 1]
|
| 45 |
+
print(f"Step {idx}\tPredicted: {pred} \tLabel: {label}")
|
| 46 |
+
|
| 47 |
+
# Step 1 Predicted: True Label: True
|
| 48 |
+
# Step 2 Predicted: True Label: True
|
| 49 |
+
# Step 3 Predicted: True Label: True
|
| 50 |
+
# Step 4 Predicted: False Label: False
|
| 51 |
```
|
| 52 |
|
| 53 |
## Training procedure
|