Update README.md
Browse files
README.md
CHANGED
|
@@ -22,7 +22,7 @@ cd graph-preflexor-grpo
|
|
| 22 |
|
| 23 |
Training run ORPO, then Graph-GRPO:
|
| 24 |
|
| 25 |
-
```
|
| 26 |
python ./src/run_orpo_graph.py
|
| 27 |
--base_model Qwen/Qwen3-8B
|
| 28 |
--dataset lamm-mit/graph_reasoning_1K
|
|
@@ -37,13 +37,13 @@ python ./src/run_orpo_graph.py
|
|
| 37 |
|
| 38 |
Test warm-start model:
|
| 39 |
|
| 40 |
-
```
|
| 41 |
-
python ./src/test_model.py --model ./orpo-
|
| 42 |
```
|
| 43 |
|
| 44 |
Graph-GRPO phase:
|
| 45 |
|
| 46 |
-
```
|
| 47 |
python ./src/run_grpo_graph.py
|
| 48 |
--base_model_dir lamm-mit/orpo-graph
|
| 49 |
--dataset lamm-mit/graph_reasoning_1K
|
|
|
|
| 22 |
|
| 23 |
Training run ORPO, then Graph-GRPO:
|
| 24 |
|
| 25 |
+
```bash
|
| 26 |
python ./src/run_orpo_graph.py
|
| 27 |
--base_model Qwen/Qwen3-8B
|
| 28 |
--dataset lamm-mit/graph_reasoning_1K
|
|
|
|
| 37 |
|
| 38 |
Test warm-start model:
|
| 39 |
|
| 40 |
+
```bash
|
| 41 |
+
python ./src/test_model.py --model ./orpo-graph
|
| 42 |
```
|
| 43 |
|
| 44 |
Graph-GRPO phase:
|
| 45 |
|
| 46 |
+
```bash
|
| 47 |
python ./src/run_grpo_graph.py
|
| 48 |
--base_model_dir lamm-mit/orpo-graph
|
| 49 |
--dataset lamm-mit/graph_reasoning_1K
|