QYWH commited on
Commit
61284b2
·
verified ·
1 Parent(s): 248184e

Model save

Browse files
Files changed (30) hide show
  1. .gitattributes +1 -0
  2. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager.json +1 -0
  3. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager_steps/load_data_from_hub_0/batch_manager_step.json +1 -0
  4. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager_steps/text_generation_0/batch_manager_step.json +1 -0
  5. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/pipeline.log +46 -0
  6. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/pipeline.yaml +265 -0
  7. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/stages.json +1 -0
  8. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager.json +1 -0
  9. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager_steps/load_data_from_hub_0/batch_manager_step.json +1 -0
  10. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager_steps/text_generation_0/batch_manager_step.json +1 -0
  11. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/data/steps_outputs/text_generation_0/00001.parquet +3 -0
  12. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/pipeline.log +31 -0
  13. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/pipeline.yaml +265 -0
  14. 53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/stages.json +1 -0
  15. README.md +68 -0
  16. all_results.json +8 -0
  17. config.json +30 -0
  18. generation_config.json +9 -0
  19. model.safetensors +3 -0
  20. special_tokens_map.json +23 -0
  21. steps_data/load_data_from_hub_0_04a0a6d2a56ec3ebeea8e75fb431e9f80fdc7ea6/batch_0.json +1 -0
  22. steps_data/load_data_from_hub_0_04a0a6d2a56ec3ebeea8e75fb431e9f80fdc7ea6/batch_1.json +1 -0
  23. steps_data/load_data_from_hub_0_04a0a6d2a56ec3ebeea8e75fb431e9f80fdc7ea6/batch_2.json +1 -0
  24. steps_data/load_data_from_hub_0_88f19108c6d8fd87f113fbd9b85f4cc1add05dbc/batch_0.json +1 -0
  25. steps_data/text_generation_0_0873d6dc3a6f7216415c198a6cfb1587f29dc1ff/batch_0.json +0 -0
  26. tokenizer.json +3 -0
  27. tokenizer_config.json +196 -0
  28. train_results.json +8 -0
  29. trainer_state.json +590 -0
  30. training_args.bin +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"steps":{"load_data_from_hub_0":"/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager_steps/load_data_from_hub_0/batch_manager_step.json","text_generation_0":"/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager_steps/text_generation_0/batch_manager_step.json"},"last_batch_received":{"text_generation_0":{"seq_no":0,"step_name":"text_generation_0","last_batch":true,"data_hash":"fec9820608752d5ecabb6f2e921f2fa48740c62d","accumulated":false,"created_from":{"load_data_from_hub_0":[[0,10,10]]},"batch_routed_to":[],"size":40,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}},"load_data_from_hub_0":{"seq_no":2,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":"122d08c31c02ee6d6e9ebb7f1b4554299ca46c69","accumulated":false,"created_from":{},"batch_routed_to":[],"size":50,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}},"last_batch_sent":{"text_generation_0":{"seq_no":0,"step_name":"text_generation_0","last_batch":false,"data_hash":null,"accumulated":false,"created_from":{"load_data_from_hub_0":[[0,50,50]]},"batch_routed_to":[],"size":0,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}},"load_data_from_hub_0":{"seq_no":2,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":null,"accumulated":false,"created_from":{},"batch_routed_to":[],"size":0,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}},"last_batch_flag_sent_to":["text_generation_0"],"received_batch_seq_nos":{"text_generation_0":[],"load_data_from_hub_0":[0,1,2]},"type_info":{"module":"distilabel.pipeline.batch_manager","name":"_BatchManager"}}
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager_steps/load_data_from_hub_0/batch_manager_step.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step_name":"load_data_from_hub_0","accumulate":false,"input_batch_size":null,"data":{},"built_batches":[],"seq_no":0,"last_batch_received":[],"convergence_step":true,"convergence_step_batches_consumed":{},"next_expected_created_from_batch_seq_no":0,"next_expected_seq_no":{},"step_signature":"04a0a6d2a56ec3ebeea8e75fb431e9f80fdc7ea6","use_cache":true,"step_offset":{},"type_info":{"module":"distilabel.pipeline.batch_manager","name":"_BatchManagerStep"}}
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager_steps/text_generation_0/batch_manager_step.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step_name":"text_generation_0","accumulate":false,"input_batch_size":50,"data":{"load_data_from_hub_0":[{"seq_no":0,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":"9cfccc8825b8b677766ef3cd02fe7a634b068ead","accumulated":false,"created_from":{},"batch_routed_to":[],"size":50,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}},{"seq_no":1,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":"e16694c3891d16c01bf6c4f6d41b96f263fef967","accumulated":false,"created_from":{},"batch_routed_to":[],"size":50,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}},{"seq_no":2,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":"122d08c31c02ee6d6e9ebb7f1b4554299ca46c69","accumulated":false,"created_from":{},"batch_routed_to":[],"size":50,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}]},"built_batches":[],"seq_no":1,"last_batch_received":[],"convergence_step":false,"convergence_step_batches_consumed":{},"next_expected_created_from_batch_seq_no":0,"next_expected_seq_no":{"load_data_from_hub_0":[1,1]},"step_signature":"0873d6dc3a6f7216415c198a6cfb1587f29dc1ff","use_cache":true,"step_offset":{"load_data_from_hub_0":[0,50]},"type_info":{"module":"distilabel.pipeline.batch_manager","name":"_BatchManagerStep"}}
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/pipeline.log ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-02-18 20:03:05] INFO 📝 Pipeline data will be written to '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/data/steps_outputs'
2
+ [2025-02-18 20:03:05] INFO ⌛ The steps of the pipeline will be loaded in stages:
3
+ * Legend: 🚰 GeneratorStep 🌐 GlobalStep 🔄 Step
4
+ * Stage 0:
5
+ - 🚰 'load_data_from_hub_0'
6
+ - 🔄 'text_generation_0' (results cached, won't be loaded and executed)
7
+ [2025-02-18 20:03:05] INFO ⏳ Waiting for all the steps of stage 0 to load...
8
+ [2025-02-18 20:03:08] INFO ⏳ Steps from stage 0 loaded: 1/1
9
+ * 'load_data_from_hub_0' replicas: 1/1
10
+ [2025-02-18 20:03:08] INFO ✅ All the steps from stage 0 have been loaded!
11
+ [2025-02-18 20:03:08] INFO 🚰 Starting yielding batches from generator step 'load_data_from_hub_0'. Offset: 0
12
+ [2025-02-18 20:03:08] INFO 📨 Step 'load_data_from_hub_0' sending batch 0 to output queue
13
+ [2025-02-18 20:05:06] INFO 🛑 Stopping pipeline. Waiting for steps to finish processing batches...
14
+ [2025-02-18 20:05:06] INFO 🛑 Stopping yielding batches from step 'load_data_from_hub_0'
15
+ [2025-02-18 20:05:06] INFO 🏁 Finished running step 'load_data_from_hub_0' (replica ID: 0)
16
+ [2025-02-18 20:05:07] WARNING 🛑 Press again to force the pipeline to stop.
17
+ [2025-02-18 20:05:18] INFO 💾 Loading `_BatchManager` from cache: '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager.json'
18
+ [2025-02-18 20:05:18] INFO 📝 Pipeline data will be written to '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/data/steps_outputs'
19
+ [2025-02-18 20:05:18] INFO ⌛ The steps of the pipeline will be loaded in stages:
20
+ * Legend: 🚰 GeneratorStep 🌐 GlobalStep 🔄 Step
21
+ * Stage 0:
22
+ - 🚰 'load_data_from_hub_0'
23
+ - 🔄 'text_generation_0' (results cached, won't be loaded and executed)
24
+ [2025-02-18 20:05:18] INFO ⏳ Waiting for all the steps of stage 0 to load...
25
+ [2025-02-18 20:05:21] INFO ⏳ Steps from stage 0 loaded: 1/1
26
+ * 'load_data_from_hub_0' replicas: 1/1
27
+ [2025-02-18 20:05:21] INFO ✅ All the steps from stage 0 have been loaded!
28
+ [2025-02-18 20:05:21] INFO 🚰 Starting yielding batches from generator step 'load_data_from_hub_0'. Offset: 50
29
+ [2025-02-18 20:05:21] INFO 📨 Step 'load_data_from_hub_0' sending batch 1 to output queue
30
+ [2025-02-18 20:06:49] INFO 🛑 Stopping pipeline. Waiting for steps to finish processing batches...
31
+ [2025-02-18 20:06:49] INFO 🛑 Stopping yielding batches from step 'load_data_from_hub_0'
32
+ [2025-02-18 20:06:49] INFO 🏁 Finished running step 'load_data_from_hub_0' (replica ID: 0)
33
+ [2025-02-18 20:06:49] WARNING 🛑 Press again to force the pipeline to stop.
34
+ [2025-02-18 20:13:20] INFO 💾 Loading `_BatchManager` from cache: '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/batch_manager.json'
35
+ [2025-02-18 20:13:20] INFO 📝 Pipeline data will be written to '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/data/steps_outputs'
36
+ [2025-02-18 20:13:20] INFO ⌛ The steps of the pipeline will be loaded in stages:
37
+ * Legend: 🚰 GeneratorStep 🌐 GlobalStep 🔄 Step
38
+ * Stage 0:
39
+ - 🚰 'load_data_from_hub_0'
40
+ - 🔄 'text_generation_0' (results cached, won't be loaded and executed)
41
+ [2025-02-18 20:13:20] INFO ⏳ Waiting for all the steps of stage 0 to load...
42
+ [2025-02-18 20:13:22] INFO ⏳ Steps from stage 0 loaded: 1/1
43
+ * 'load_data_from_hub_0' replicas: 1/1
44
+ [2025-02-18 20:13:22] INFO ✅ All the steps from stage 0 have been loaded!
45
+ [2025-02-18 20:13:22] INFO 🚰 Starting yielding batches from generator step 'load_data_from_hub_0'. Offset: 100
46
+ [2025-02-18 20:13:22] INFO 📨 Step 'load_data_from_hub_0' sending batch 2 to output queue
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/pipeline.yaml ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ distilabel:
2
+ version: 1.5.3
3
+ pipeline:
4
+ name: /home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO
5
+ description: A pipeline to generate data from a distilled r1 model
6
+ steps:
7
+ - step:
8
+ name: text_generation_0
9
+ resources:
10
+ replicas: 1
11
+ cpus: null
12
+ gpus: null
13
+ memory: null
14
+ resources: null
15
+ input_mappings:
16
+ instruction: problem
17
+ output_mappings: {}
18
+ use_cache: true
19
+ input_batch_size: 50
20
+ llm:
21
+ cuda_devices: auto
22
+ disable_cuda_device_placement: false
23
+ use_magpie_template: false
24
+ magpie_pre_query_template: null
25
+ generation_kwargs:
26
+ temperature: 0.6
27
+ max_new_tokens: 8192
28
+ use_offline_batch_generation: false
29
+ offline_batch_generation_block_until_done: null
30
+ jobs_ids: null
31
+ model: /home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO
32
+ dtype: auto
33
+ trust_remote_code: false
34
+ quantization: null
35
+ revision: null
36
+ tokenizer: /home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO
37
+ tokenizer_mode: auto
38
+ tokenizer_revision: null
39
+ skip_tokenizer_init: false
40
+ chat_template: null
41
+ seed: 0
42
+ extra_kwargs:
43
+ tensor_parallel_size: 1
44
+ max_model_len: 8192
45
+ structured_output: null
46
+ type_info:
47
+ module: distilabel.models.llms.vllm
48
+ name: vLLM
49
+ group_generations: false
50
+ add_raw_output: true
51
+ add_raw_input: true
52
+ num_generations: 4
53
+ use_default_structured_output: false
54
+ system_prompt: null
55
+ use_system_prompt: true
56
+ template: "You will be given a problem. Please reason step by step, and put\
57
+ \ your final answer within \boxed{}:\n{{ instruction }}"
58
+ columns:
59
+ - instruction
60
+ runtime_parameters_info:
61
+ - name: resources
62
+ runtime_parameters_info:
63
+ - name: replicas
64
+ optional: true
65
+ description: The number of replicas for the step.
66
+ - name: cpus
67
+ optional: true
68
+ description: The number of CPUs assigned to each step replica.
69
+ - name: gpus
70
+ optional: true
71
+ description: The number of GPUs assigned to each step replica.
72
+ - name: memory
73
+ optional: true
74
+ description: The memory in bytes required for each step replica.
75
+ - name: resources
76
+ optional: true
77
+ description: A dictionary containing names of custom resources and the number
78
+ of those resources required for each step replica.
79
+ - name: input_batch_size
80
+ optional: true
81
+ description: The number of rows that will contain the batches processed by
82
+ the step.
83
+ - name: llm
84
+ runtime_parameters_info:
85
+ - name: cuda_devices
86
+ optional: true
87
+ description: A list with the ID of the CUDA devices to be used.
88
+ - name: disable_cuda_device_placement
89
+ optional: true
90
+ description: Whether to disable the CUDA device placement logic or not.
91
+ - name: generation_kwargs
92
+ description: The kwargs to be propagated to either `generate` or `agenerate`
93
+ methods within each `LLM`.
94
+ keys:
95
+ - name: max_new_tokens
96
+ optional: true
97
+ description: the maximum number of new tokens that the model will generate. Defaults
98
+ to `128`.
99
+ - name: presence_penalty
100
+ optional: true
101
+ description: the presence penalty to use for the generation. Defaults
102
+ to `0.0`.
103
+ - name: frequency_penalty
104
+ optional: true
105
+ description: the repetition penalty to use for the generation. Defaults to
106
+ `0.0`.
107
+ - name: repetition_penalty
108
+ optional: true
109
+ description: the repetition penalty to use for the generation Defaults
110
+ to `1.0`.
111
+ - name: temperature
112
+ optional: true
113
+ description: the temperature to use for the generation. Defaults to `0.1`.
114
+ - name: top_p
115
+ optional: true
116
+ description: the top-p value to use for the generation. Defaults to `1.0`.
117
+ - name: top_k
118
+ optional: true
119
+ description: the top-k value to use for the generation. Defaults to `0`.
120
+ - name: min_p
121
+ optional: true
122
+ description: the minimum probability to use for the generation. Defaults
123
+ to `0.0`.
124
+ - name: logprobs
125
+ optional: true
126
+ description: number of log probabilities to return per output token. If
127
+ `None`, then no log probability won't be returned. Defaults to `None`.
128
+ - name: stop
129
+ optional: true
130
+ description: a list of strings that will be used to stop the generation
131
+ when found. Defaults to `None`.
132
+ - name: stop_token_ids
133
+ optional: true
134
+ description: a list of token ids that will be used to stop the generation when
135
+ found. Defaults to `None`.
136
+ - name: include_stop_str_in_output
137
+ optional: true
138
+ description: whether to include the stop string in the output. Defaults
139
+ to `False`.
140
+ - name: skip_special_tokens
141
+ optional: true
142
+ description: whether to exclude special tokens from the output. Defaults to
143
+ `False`.
144
+ - name: logits_processors
145
+ optional: true
146
+ description: a list of functions to process the logits before sampling. Defaults
147
+ to `None`.
148
+ - name: extra_sampling_params
149
+ optional: true
150
+ description: dictionary with additional arguments to be passed to the
151
+ `SamplingParams` class from `vllm`.
152
+ - name: echo
153
+ optional: true
154
+ description: whether to echo the include the prompt in the response or
155
+ not. Defaults to `False`.
156
+ - name: use_offline_batch_generation
157
+ optional: true
158
+ description: Whether to use the `offline_batch_generate` method to generate
159
+ the responses.
160
+ - name: offline_batch_generation_block_until_done
161
+ optional: true
162
+ description: If provided, then polling will be done until the `ofline_batch_generate`
163
+ method is able to retrieve the results. The value indicate the time to
164
+ wait between each polling.
165
+ - name: extra_kwargs
166
+ optional: true
167
+ description: 'Additional dictionary of keyword arguments that will be passed
168
+ to the `vLLM` class of `vllm` library. See all the supported arguments
169
+ at: https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/llm.py'
170
+ - name: structured_output
171
+ optional: true
172
+ description: The structured output format to use across all the generations.
173
+ - name: add_raw_output
174
+ optional: true
175
+ description: Whether to include the raw output of the LLM in the key `raw_output_<TASK_NAME>`
176
+ of the `distilabel_metadata` dictionary output column
177
+ - name: add_raw_input
178
+ optional: true
179
+ description: Whether to include the raw input of the LLM in the key `raw_input_<TASK_NAME>`
180
+ of the `distilabel_metadata` dictionary column
181
+ - name: num_generations
182
+ optional: true
183
+ description: The number of generations to be produced per input.
184
+ type_info:
185
+ module: distilabel.steps.tasks.text_generation
186
+ name: TextGeneration
187
+ name: text_generation_0
188
+ - step:
189
+ name: load_data_from_hub_0
190
+ resources:
191
+ replicas: 1
192
+ cpus: null
193
+ gpus: null
194
+ memory: null
195
+ resources: null
196
+ input_mappings: {}
197
+ output_mappings: {}
198
+ use_cache: true
199
+ batch_size: 50
200
+ repo_id: default_name
201
+ split: train
202
+ config: null
203
+ revision: null
204
+ streaming: false
205
+ num_examples: 1300
206
+ storage_options: null
207
+ runtime_parameters_info:
208
+ - name: resources
209
+ runtime_parameters_info:
210
+ - name: replicas
211
+ optional: true
212
+ description: The number of replicas for the step.
213
+ - name: cpus
214
+ optional: true
215
+ description: The number of CPUs assigned to each step replica.
216
+ - name: gpus
217
+ optional: true
218
+ description: The number of GPUs assigned to each step replica.
219
+ - name: memory
220
+ optional: true
221
+ description: The memory in bytes required for each step replica.
222
+ - name: resources
223
+ optional: true
224
+ description: A dictionary containing names of custom resources and the number
225
+ of those resources required for each step replica.
226
+ - name: batch_size
227
+ optional: true
228
+ description: The number of rows that will contain the batches generated by
229
+ the step.
230
+ - name: repo_id
231
+ optional: false
232
+ description: The Hugging Face Hub repository ID of the dataset to load.
233
+ - name: split
234
+ optional: true
235
+ description: The split of the dataset to load. Defaults to 'train'.
236
+ - name: config
237
+ optional: true
238
+ description: The configuration of the dataset to load. This is optional and
239
+ only needed if the dataset has multiple configurations.
240
+ - name: revision
241
+ optional: true
242
+ description: The revision of the dataset to load. Defaults to the latest revision.
243
+ - name: streaming
244
+ optional: true
245
+ description: Whether to load the dataset in streaming mode or not. Defaults
246
+ to False.
247
+ - name: num_examples
248
+ optional: true
249
+ description: The number of examples to load from the dataset. By default will
250
+ load all examples.
251
+ type_info:
252
+ module: distilabel.steps.generators.huggingface
253
+ name: LoadDataFromHub
254
+ name: load_data_from_hub_0
255
+ connections:
256
+ - from: text_generation_0
257
+ to: []
258
+ - from: load_data_from_hub_0
259
+ to:
260
+ - text_generation_0
261
+ routing_batch_functions: []
262
+ type_info:
263
+ module: distilabel.pipeline.local
264
+ name: Pipeline
265
+ requirements: []
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/4a2859cd0d6cd35b1f810c4fd4ad83db1f10fae9/stages.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"current_stage":0,"stages_last_batch":[[]]}
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"steps":{"load_data_from_hub_0":"/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager_steps/load_data_from_hub_0/batch_manager_step.json","text_generation_0":"/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager_steps/text_generation_0/batch_manager_step.json"},"last_batch_received":{"text_generation_0":{"seq_no":0,"step_name":"text_generation_0","last_batch":true,"data_hash":"fec9820608752d5ecabb6f2e921f2fa48740c62d","accumulated":false,"created_from":{"load_data_from_hub_0":[[0,10,10]]},"batch_routed_to":[],"size":40,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}},"load_data_from_hub_0":{"seq_no":0,"step_name":"load_data_from_hub_0","last_batch":true,"data_hash":"1376774b81b43f6e0e269cb3187301b22cff8b7e","accumulated":false,"created_from":{},"batch_routed_to":[],"size":10,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}},"last_batch_sent":{"text_generation_0":{"seq_no":0,"step_name":"text_generation_0","last_batch":true,"data_hash":null,"accumulated":false,"created_from":{"load_data_from_hub_0":[[0,10,10]]},"batch_routed_to":[],"size":0,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}},"load_data_from_hub_0":{"seq_no":0,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":null,"accumulated":false,"created_from":{},"batch_routed_to":[],"size":0,"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}},"last_batch_flag_sent_to":[],"received_batch_seq_nos":{"text_generation_0":[0],"load_data_from_hub_0":[0]},"type_info":{"module":"distilabel.pipeline.batch_manager","name":"_BatchManager"}}
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager_steps/load_data_from_hub_0/batch_manager_step.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step_name":"load_data_from_hub_0","accumulate":false,"input_batch_size":null,"data":{},"built_batches":[],"seq_no":0,"last_batch_received":[],"convergence_step":true,"convergence_step_batches_consumed":{},"next_expected_created_from_batch_seq_no":0,"next_expected_seq_no":{},"step_signature":"88f19108c6d8fd87f113fbd9b85f4cc1add05dbc","use_cache":true,"step_offset":{},"type_info":{"module":"distilabel.pipeline.batch_manager","name":"_BatchManagerStep"}}
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager_steps/text_generation_0/batch_manager_step.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step_name":"text_generation_0","accumulate":false,"input_batch_size":50,"data":{"load_data_from_hub_0":[]},"built_batches":[],"seq_no":1,"last_batch_received":["load_data_from_hub_0"],"convergence_step":false,"convergence_step_batches_consumed":{},"next_expected_created_from_batch_seq_no":0,"next_expected_seq_no":{"load_data_from_hub_0":[1,1]},"step_signature":"0873d6dc3a6f7216415c198a6cfb1587f29dc1ff","use_cache":true,"step_offset":{"load_data_from_hub_0":[0,10]},"type_info":{"module":"distilabel.pipeline.batch_manager","name":"_BatchManagerStep"}}
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/data/steps_outputs/text_generation_0/00001.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73820b5bc3b4c3ebd21f847f1afb3eb33ebc858005b72b294054a901c298ef27
3
+ size 99655
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/pipeline.log ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-02-18 19:40:09] INFO 📝 Pipeline data will be written to '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/data/steps_outputs'
2
+ [2025-02-18 19:40:09] INFO ⌛ The steps of the pipeline will be loaded in stages:
3
+ * Legend: 🚰 GeneratorStep 🌐 GlobalStep 🔄 Step
4
+ * Stage 0:
5
+ - 🚰 'load_data_from_hub_0'
6
+ - 🔄 'text_generation_0'
7
+ [2025-02-18 19:40:09] INFO ⏳ Waiting for all the steps of stage 0 to load...
8
+ [2025-02-18 19:40:09] INFO 🎮 LLM 'text_generation_0-replica-0' is going to use the following CUDA devices: [0].
9
+ [2025-02-18 19:40:12] INFO ⏳ Steps from stage 0 loaded: 1/2
10
+ * 'text_generation_0' replicas: 0/1
11
+ * 'load_data_from_hub_0' replicas: 1/1
12
+ [2025-02-18 19:40:59] INFO ⏳ Steps from stage 0 loaded: 2/2
13
+ * 'text_generation_0' replicas: 1/1
14
+ * 'load_data_from_hub_0' replicas: 1/1
15
+ [2025-02-18 19:40:59] INFO ✅ All the steps from stage 0 have been loaded!
16
+ [2025-02-18 19:40:59] INFO 🚰 Starting yielding batches from generator step 'load_data_from_hub_0'. Offset: 0
17
+ [2025-02-18 19:40:59] INFO 📨 Step 'load_data_from_hub_0' sending batch 0 to output queue
18
+ [2025-02-18 19:40:59] INFO 🏁 Finished running step 'load_data_from_hub_0' (replica ID: 0)
19
+ [2025-02-18 19:40:59] INFO 📦 Processing batch 0 in 'text_generation_0' (replica ID: 0)
20
+ [2025-02-18 19:41:55] INFO 📨 Step 'text_generation_0' sending batch 0 to output queue
21
+ [2025-02-18 19:41:55] INFO 🏁 Finished running step 'text_generation_0' (replica ID: 0)
22
+ [2025-02-18 19:44:10] INFO 💾 Loading `_BatchManager` from cache: '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager.json'
23
+ [2025-02-18 19:44:10] INFO 💾 Loaded batch manager from cache doesn't contain any remaining data. Returning `Distiset` from cache data...
24
+ [2025-02-18 19:46:13] INFO 💾 Loading `_BatchManager` from cache: '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager.json'
25
+ [2025-02-18 19:46:13] INFO 💾 Loaded batch manager from cache doesn't contain any remaining data. Returning `Distiset` from cache data...
26
+ [2025-02-18 19:48:28] INFO 💾 Loading `_BatchManager` from cache: '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager.json'
27
+ [2025-02-18 19:48:28] INFO 💾 Loaded batch manager from cache doesn't contain any remaining data. Returning `Distiset` from cache data...
28
+ [2025-02-18 19:57:16] INFO 💾 Loading `_BatchManager` from cache: '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager.json'
29
+ [2025-02-18 19:57:16] INFO 💾 Loaded batch manager from cache doesn't contain any remaining data. Returning `Distiset` from cache data...
30
+ [2025-02-18 19:59:09] INFO 💾 Loading `_BatchManager` from cache: '/home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO/53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/batch_manager.json'
31
+ [2025-02-18 19:59:09] INFO 💾 Loaded batch manager from cache doesn't contain any remaining data. Returning `Distiset` from cache data...
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/pipeline.yaml ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ distilabel:
2
+ version: 1.5.3
3
+ pipeline:
4
+ name: /home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO
5
+ description: A pipeline to generate data from a distilled r1 model
6
+ steps:
7
+ - step:
8
+ name: text_generation_0
9
+ resources:
10
+ replicas: 1
11
+ cpus: null
12
+ gpus: null
13
+ memory: null
14
+ resources: null
15
+ input_mappings:
16
+ instruction: problem
17
+ output_mappings: {}
18
+ use_cache: true
19
+ input_batch_size: 50
20
+ llm:
21
+ cuda_devices: auto
22
+ disable_cuda_device_placement: false
23
+ use_magpie_template: false
24
+ magpie_pre_query_template: null
25
+ generation_kwargs:
26
+ temperature: 0.6
27
+ max_new_tokens: 8192
28
+ use_offline_batch_generation: false
29
+ offline_batch_generation_block_until_done: null
30
+ jobs_ids: null
31
+ model: /home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO
32
+ dtype: auto
33
+ trust_remote_code: false
34
+ quantization: null
35
+ revision: null
36
+ tokenizer: /home/jgw/yhx/jiangu-ri/open-r1/data/DeepSeek-R1-Distill-Qwen-1.5B-GRPO
37
+ tokenizer_mode: auto
38
+ tokenizer_revision: null
39
+ skip_tokenizer_init: false
40
+ chat_template: null
41
+ seed: 0
42
+ extra_kwargs:
43
+ tensor_parallel_size: 1
44
+ max_model_len: 8192
45
+ structured_output: null
46
+ type_info:
47
+ module: distilabel.models.llms.vllm
48
+ name: vLLM
49
+ group_generations: false
50
+ add_raw_output: true
51
+ add_raw_input: true
52
+ num_generations: 4
53
+ use_default_structured_output: false
54
+ system_prompt: null
55
+ use_system_prompt: true
56
+ template: "You will be given a problem. Please reason step by step, and put\
57
+ \ your final answer within \boxed{}:\n{{ instruction }}"
58
+ columns:
59
+ - instruction
60
+ runtime_parameters_info:
61
+ - name: resources
62
+ runtime_parameters_info:
63
+ - name: replicas
64
+ optional: true
65
+ description: The number of replicas for the step.
66
+ - name: cpus
67
+ optional: true
68
+ description: The number of CPUs assigned to each step replica.
69
+ - name: gpus
70
+ optional: true
71
+ description: The number of GPUs assigned to each step replica.
72
+ - name: memory
73
+ optional: true
74
+ description: The memory in bytes required for each step replica.
75
+ - name: resources
76
+ optional: true
77
+ description: A dictionary containing names of custom resources and the number
78
+ of those resources required for each step replica.
79
+ - name: input_batch_size
80
+ optional: true
81
+ description: The number of rows that will contain the batches processed by
82
+ the step.
83
+ - name: llm
84
+ runtime_parameters_info:
85
+ - name: cuda_devices
86
+ optional: true
87
+ description: A list with the ID of the CUDA devices to be used.
88
+ - name: disable_cuda_device_placement
89
+ optional: true
90
+ description: Whether to disable the CUDA device placement logic or not.
91
+ - name: generation_kwargs
92
+ description: The kwargs to be propagated to either `generate` or `agenerate`
93
+ methods within each `LLM`.
94
+ keys:
95
+ - name: max_new_tokens
96
+ optional: true
97
+ description: the maximum number of new tokens that the model will generate. Defaults
98
+ to `128`.
99
+ - name: presence_penalty
100
+ optional: true
101
+ description: the presence penalty to use for the generation. Defaults
102
+ to `0.0`.
103
+ - name: frequency_penalty
104
+ optional: true
105
+ description: the repetition penalty to use for the generation. Defaults to
106
+ `0.0`.
107
+ - name: repetition_penalty
108
+ optional: true
109
+ description: the repetition penalty to use for the generation Defaults
110
+ to `1.0`.
111
+ - name: temperature
112
+ optional: true
113
+ description: the temperature to use for the generation. Defaults to `0.1`.
114
+ - name: top_p
115
+ optional: true
116
+ description: the top-p value to use for the generation. Defaults to `1.0`.
117
+ - name: top_k
118
+ optional: true
119
+ description: the top-k value to use for the generation. Defaults to `0`.
120
+ - name: min_p
121
+ optional: true
122
+ description: the minimum probability to use for the generation. Defaults
123
+ to `0.0`.
124
+ - name: logprobs
125
+ optional: true
126
+ description: number of log probabilities to return per output token. If
127
+ `None`, then no log probability won't be returned. Defaults to `None`.
128
+ - name: stop
129
+ optional: true
130
+ description: a list of strings that will be used to stop the generation
131
+ when found. Defaults to `None`.
132
+ - name: stop_token_ids
133
+ optional: true
134
+ description: a list of token ids that will be used to stop the generation when
135
+ found. Defaults to `None`.
136
+ - name: include_stop_str_in_output
137
+ optional: true
138
+ description: whether to include the stop string in the output. Defaults
139
+ to `False`.
140
+ - name: skip_special_tokens
141
+ optional: true
142
+ description: whether to exclude special tokens from the output. Defaults to
143
+ `False`.
144
+ - name: logits_processors
145
+ optional: true
146
+ description: a list of functions to process the logits before sampling. Defaults
147
+ to `None`.
148
+ - name: extra_sampling_params
149
+ optional: true
150
+ description: dictionary with additional arguments to be passed to the
151
+ `SamplingParams` class from `vllm`.
152
+ - name: echo
153
+ optional: true
154
+ description: whether to echo the include the prompt in the response or
155
+ not. Defaults to `False`.
156
+ - name: use_offline_batch_generation
157
+ optional: true
158
+ description: Whether to use the `offline_batch_generate` method to generate
159
+ the responses.
160
+ - name: offline_batch_generation_block_until_done
161
+ optional: true
162
+ description: If provided, then polling will be done until the `ofline_batch_generate`
163
+ method is able to retrieve the results. The value indicate the time to
164
+ wait between each polling.
165
+ - name: extra_kwargs
166
+ optional: true
167
+ description: 'Additional dictionary of keyword arguments that will be passed
168
+ to the `vLLM` class of `vllm` library. See all the supported arguments
169
+ at: https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/llm.py'
170
+ - name: structured_output
171
+ optional: true
172
+ description: The structured output format to use across all the generations.
173
+ - name: add_raw_output
174
+ optional: true
175
+ description: Whether to include the raw output of the LLM in the key `raw_output_<TASK_NAME>`
176
+ of the `distilabel_metadata` dictionary output column
177
+ - name: add_raw_input
178
+ optional: true
179
+ description: Whether to include the raw input of the LLM in the key `raw_input_<TASK_NAME>`
180
+ of the `distilabel_metadata` dictionary column
181
+ - name: num_generations
182
+ optional: true
183
+ description: The number of generations to be produced per input.
184
+ type_info:
185
+ module: distilabel.steps.tasks.text_generation
186
+ name: TextGeneration
187
+ name: text_generation_0
188
+ - step:
189
+ name: load_data_from_hub_0
190
+ resources:
191
+ replicas: 1
192
+ cpus: null
193
+ gpus: null
194
+ memory: null
195
+ resources: null
196
+ input_mappings: {}
197
+ output_mappings: {}
198
+ use_cache: true
199
+ batch_size: 50
200
+ repo_id: default_name
201
+ split: train
202
+ config: null
203
+ revision: null
204
+ streaming: false
205
+ num_examples: 10
206
+ storage_options: null
207
+ runtime_parameters_info:
208
+ - name: resources
209
+ runtime_parameters_info:
210
+ - name: replicas
211
+ optional: true
212
+ description: The number of replicas for the step.
213
+ - name: cpus
214
+ optional: true
215
+ description: The number of CPUs assigned to each step replica.
216
+ - name: gpus
217
+ optional: true
218
+ description: The number of GPUs assigned to each step replica.
219
+ - name: memory
220
+ optional: true
221
+ description: The memory in bytes required for each step replica.
222
+ - name: resources
223
+ optional: true
224
+ description: A dictionary containing names of custom resources and the number
225
+ of those resources required for each step replica.
226
+ - name: batch_size
227
+ optional: true
228
+ description: The number of rows that will contain the batches generated by
229
+ the step.
230
+ - name: repo_id
231
+ optional: false
232
+ description: The Hugging Face Hub repository ID of the dataset to load.
233
+ - name: split
234
+ optional: true
235
+ description: The split of the dataset to load. Defaults to 'train'.
236
+ - name: config
237
+ optional: true
238
+ description: The configuration of the dataset to load. This is optional and
239
+ only needed if the dataset has multiple configurations.
240
+ - name: revision
241
+ optional: true
242
+ description: The revision of the dataset to load. Defaults to the latest revision.
243
+ - name: streaming
244
+ optional: true
245
+ description: Whether to load the dataset in streaming mode or not. Defaults
246
+ to False.
247
+ - name: num_examples
248
+ optional: true
249
+ description: The number of examples to load from the dataset. By default will
250
+ load all examples.
251
+ type_info:
252
+ module: distilabel.steps.generators.huggingface
253
+ name: LoadDataFromHub
254
+ name: load_data_from_hub_0
255
+ connections:
256
+ - from: text_generation_0
257
+ to: []
258
+ - from: load_data_from_hub_0
259
+ to:
260
+ - text_generation_0
261
+ routing_batch_functions: []
262
+ type_info:
263
+ module: distilabel.pipeline.local
264
+ name: Pipeline
265
+ requirements: []
53bedcbb53ca1db3fed4f15c1fb88e4d8a6089c8/executions/6a1f0ecd457e3ba74e92592776c6c93fb7e737bd/stages.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"current_stage":0,"stages_last_batch":[["text_generation_0"]]}
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
3
+ library_name: transformers
4
+ model_name: DeepSeek-R1-Distill-Qwen-1.5B-GRPO
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - grpo
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for DeepSeek-R1-Distill-Qwen-1.5B-GRPO
13
+
14
+ This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="QYWH/DeepSeek-R1-Distill-Qwen-1.5B-GRPO", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/2495479412-/huggingface/runs/ll8pde88)
31
+
32
+
33
+ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.16.0.dev0
38
+ - Transformers: 4.49.0.dev0
39
+ - Pytorch: 2.5.1
40
+ - Datasets: 3.3.0
41
+ - Tokenizers: 0.21.0
42
+
43
+ ## Citations
44
+
45
+ Cite GRPO as:
46
+
47
+ ```bibtex
48
+ @article{zhihong2024deepseekmath,
49
+ title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
50
+ author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
51
+ year = 2024,
52
+ eprint = {arXiv:2402.03300},
53
+ }
54
+
55
+ ```
56
+
57
+ Cite TRL as:
58
+
59
+ ```bibtex
60
+ @misc{vonwerra2022trl,
61
+ title = {{TRL: Transformer Reinforcement Learning}},
62
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
63
+ year = 2020,
64
+ journal = {GitHub repository},
65
+ publisher = {GitHub},
66
+ howpublished = {\url{https://github.com/huggingface/trl}}
67
+ }
68
+ ```
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 0.0,
3
+ "train_loss": 0.036479350634126606,
4
+ "train_runtime": 34004.8654,
5
+ "train_samples": 1300,
6
+ "train_samples_per_second": 1.529,
7
+ "train_steps_per_second": 0.006
8
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 10000,
22
+ "sliding_window": 4096,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.49.0.dev0",
26
+ "use_cache": false,
27
+ "use_mrope": false,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151646,
4
+ "do_sample": true,
5
+ "eos_token_id": 151643,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.49.0.dev0"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e51f376edd035aeedab94a3ff434d2f503da5f1e58c828fe0ffe97e3728bb6f
3
+ size 3554214752
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end▁of▁sentence|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
steps_data/load_data_from_hub_0_04a0a6d2a56ec3ebeea8e75fb431e9f80fdc7ea6/batch_0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seq_no":0,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":"9cfccc8825b8b677766ef3cd02fe7a634b068ead","accumulated":false,"created_from":{},"batch_routed_to":[],"size":50,"data":[[{"problem":"请将下面的文言文转为白话文:贞祝(兄)于祖辛?","solution":"贞问行祝祷祭于先壬祖辛么?","messages":[{"content":"请将下面的文言文转为白话文:贞祝(兄)于祖辛?","role":"user"},{"content":"贞问行祝祷祭于先壬祖辛么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:其七朋。","solution":"七朋贝么?","messages":[{"content":"请将下面的文言文转为白话文:其七朋。","role":"user"},{"content":"七朋贝么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:甲辰卜,㱿,贞下乙宾于【咸】。","solution":"甲辰日占卜,贞人㱿问卦,贞问先王祖乙(即下乙)傧配于先王大乙(即咸、唐等为其又名)么?","messages":[{"content":"请将下面的文言文转为白话文:甲辰卜,㱿,贞下乙宾于【咸】。","role":"user"},{"content":"甲辰日占卜,贞人㱿问卦,贞问先王祖乙(即下乙)傧配于先王大乙(即咸、唐等为其又名)么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞泳。","solution":"贞问还是命令贵族泳?","messages":[{"content":"请将下面的文言文转为白话文:贞泳。","role":"user"},{"content":"贞问还是命令贵族泳?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:戊辰卜,侑𠬝妣己一女,妣庚一女。","solution":"戊辰日占卜,行侑求之祭以𠬝奴为献,祭先妣名妣己者一名女子、妣庚以一名女子为献。","messages":[{"content":"请将下面的文言文转为白话文:戊辰卜,侑𠬝妣己一女,妣庚一女。","role":"user"},{"content":"戊辰日占卜,行侑求之祭以𠬝奴为献,祭先妣名妣己者一名女子、妣庚以一名女子为献。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸亥【卜𬻿,禘南。","solution":"癸亥日卜问,禘祭南方之神么?","messages":[{"content":"请将下面的文言文转为白话文:癸亥【卜𬻿,禘南。","role":"user"},{"content":"癸亥日卜问,禘祭南方之神么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞勿呼𰅵先御燎。四","solution":"贞问还是不命令贵族𰅵先往举行御除灾害之祭与烧燎之祭?","messages":[{"content":"请将下面的文言文转为白话文:贞勿呼𰅵先御燎。四","role":"user"},{"content":"贞问还是不命令贵族𰅵先往举行御除灾害之祭与烧燎之祭?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:甲方块【卜,贞·亡·","solution":"因辞残,贞问之事不可得知。","messages":[{"content":"请将下面的文言文转为白话文:甲方块【卜,贞·亡·","role":"user"},{"content":"因辞残,贞问之事不可得知。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:驭釐。","solution":"来福吉利?","messages":[{"content":"请将下面的文言文转为白话文:驭釐。","role":"user"},{"content":"来福吉利?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞祀有若。","solution":"贞问行祀祭会有顺若吉利吧?","messages":[{"content":"请将下面的文言文转为白话文:贞祀有若。","role":"user"},{"content":"贞问行祀祭会有顺若吉利吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞帝弗终兹邑。","solution":"贞问上帝不对这个城邑终穷作灾吧?","messages":[{"content":"请将下面的文言文转为白话文:贞帝弗终兹邑。","role":"user"},{"content":"贞问上帝不对这个城邑终穷作灾吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:𣥅。","solution":"有祟害?","messages":[{"content":"请将下面的文言文转为白话文:𣥅。","role":"user"},{"content":"有祟害?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:丙申卜,行,贞父丁岁物。在五月。","solution":"丙申日占卜,贞人行问卦,贞问行刿杀杂色牛于父丁(即时王之父武丁)么?这是在五月占卜的。","messages":[{"content":"请将下面的文言文转为白话文:丙申卜,行,贞父丁岁物。在五月。","role":"user"},{"content":"丙申日占卜,贞人行问卦,贞问行刿杀杂色牛于父丁(即时王之父武丁)么?这是在五月占卜的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:侑于羌甲。","solution":"行侑求之祭于先王羌甲么?","messages":[{"content":"请将下面的文言文转为白话文:侑于羌甲。","role":"user"},{"content":"行侑求之祭于先王羌甲么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:以多田亞任。","solution":"致进多甸(即田)官、亞官和任(同男爵)官么?","messages":[{"content":"请将下面的文言文转为白话文:以多田亞任。","role":"user"},{"content":"致进多甸(即田)官、亞官和任(同男爵)官么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:二月在蜀。","solution":"二月在蜀地么?","messages":[{"content":"请将下面的文言文转为白话文:二月在蜀。","role":"user"},{"content":"二月在蜀地么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:𤉲于夒,𫳅。十月。","solution":"行烧燎之祭于高祖夒,用一对羊么?这是十月占卜的。","messages":[{"content":"请将下面的文言文转为白话文:𤉲于夒,𫳅。十月。","role":"user"},{"content":"行烧燎之祭于高祖夒,用一对羊么?这是十月占卜的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:其雉众。吉","solution":"伤损了众人么?这一卜是吉利的。","messages":[{"content":"请将下面的文言文转为白话文:其雉众。吉","role":"user"},{"content":"伤损了众人么?这一卜是吉利的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸酉卜,行,贞王父丁岁一牛眔兄己一牛,兄庚··亡尤。","solution":"癸酉日占卜,贞人行问卦,贞问商王刿杀一头牛祭父丁(即时王祖甲之父武丁),及兄己(时王之兄孝己)宀头牛献祭,兄庚(即时王之兄祖庚)··没有灾忧之事发生么?","messages":[{"content":"请将下面的文言文转为白话文:癸酉卜,行,贞王父丁岁一牛眔兄己一牛,兄庚··亡尤。","role":"user"},{"content":"癸酉日占卜,贞人行问卦,贞问商王刿杀一头牛祭父丁(即时王祖甲之父武丁),及兄己(时王之兄孝己)宀头牛献祭,兄庚(即时王之兄祖庚)··没有灾忧之事发生么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞于大甲。","solution":"贞问于先王大甲庙(举行祭祀)么?","messages":[{"content":"请将下面的文言文转为白话文:贞于大甲。","role":"user"},{"content":"贞问于先王大甲庙(举行祭祀)么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:叀··用,王【受】佑。","solution":"叀诵咏,商王受到保佑么?","messages":[{"content":"请将下面的文言文转为白话文:叀··用,王【受】佑。","role":"user"},{"content":"叀诵咏,商王受到保佑么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:方块方块卜,行【贞】王宾··亡尤。","solution":"某日占卜,贞人行问卦,贞问商王傧于·没有灾忧之事发生吧?","messages":[{"content":"请将下面的文言文转为白话文:方块方块卜,行【贞】王宾··亡尤。","role":"user"},{"content":"某日占卜,贞人行问卦,贞问商王傧于·没有灾忧之事发生吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸亥卜争,贞旬亡祸。·【来】艰。五日丁卯王狩··亦仄在","solution":"【车】··癸𠖮日占卜,贞人争问卦,贞问下一个十天一旬之内没有灾祸之事发生吧?·(𰉏辞残,应为商王判断说:将有祟)艰之事发生。事后所记的应验结果是占卜后的第五日丁卯,商王去田狩打猎···(出现了不好的情况,某人)也歪在了车上··。","messages":[{"content":"请将下面的文言文转为白话文:癸亥卜争,贞旬亡祸。·【来】艰。五日丁卯王狩··亦仄在","role":"user"},{"content":"【车】··癸𠖮日占卜,贞人争问卦,贞问下一个十天一旬之内没有灾祸之事发生吧?·(𰉏辞残,应为商王判断说:将有祟)艰之事发生。事后所记的应验结果是占卜后的第五日丁卯,商王去田狩打猎···(出现了不好的情况,某人)也歪在了车上··。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚子··侑于·","solution":"庚子日占卜,行侑求之祭于···么?","messages":[{"content":"请将下面的文言文转为白话文:庚子··侑于·","role":"user"},{"content":"庚子日占卜,行侑求之祭于···么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞不其受【年】。","solution":"贞问不会得到好年成么?","messages":[{"content":"请将下面的文言文转为白话文:贞不其受【年】。","role":"user"},{"content":"贞问不会得到好年成么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸丑卜,争,贞自今至于丁巳我𢦏宙。","solution":"王𰉏日:丁巳我不其𢦏,来甲子𢦏。旬又一日癸亥车弗𢦏。之夕垔,甲子允𢦏。二三四五癸丑日占卜,贞人争问卦,贞问自今(占卜之日)至丁巳日商王(我)要征伐宙方国么?商王看了卜兆以后,亲自判断说:丁巳日商王(我)不征伐宙,于未来的甲子日才去征伐。事后所记应验的结果是十一天之后���癸亥日,商壬之车没有灾伤(卜辞车多为战车意。此处之车,据《合集》11442“车不其氏十朋”看,似应为人名或地名。若此,则应是车地的贵族名车者没有灾伤)。当天夜里天气变坏,于甲子日果然灾伤了(宙)。","messages":[{"content":"请将下面的文言文转为白话文:癸丑卜,争,贞自今至于丁巳我𢦏宙。","role":"user"},{"content":"王𰉏日:丁巳我不其𢦏,来甲子𢦏。旬又一日癸亥车弗𢦏。之夕垔,甲子允𢦏。二三四五癸丑日占卜,贞人争问卦,贞问自今(占卜之日)至丁巳日商王(我)要征伐宙方国么?商王看了卜兆以后,亲自判断说:丁巳日商王(我)不征伐宙,于未来的甲子日才去征伐。事后所记应验的结果是十一天之后的癸亥日,商壬之车没有灾伤(卜辞车多为战车意。此处之车,据《合集》11442“车不其氏十朋”看,似应为人名或地名。若此,则应是车地的贵族名车者没有灾伤)。当天夜里天气变坏,于甲子日果然灾伤了(宙)。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:叀竝令省廪。","solution":"是命令贵族名竝者去省视仓廪么?","messages":[{"content":"请将下面的文言文转为白话文:叀竝令省廪。","role":"user"},{"content":"是命令贵族名竝者去省视仓廪么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:壬午卜,扶,酒阳甲。","solution":"壬午日占卜,贞人扶问卦,酒祭先王阳甲么?","messages":[{"content":"请将下面的文言文转为白话文:壬午卜,扶,酒阳甲。","role":"user"},{"content":"壬午日占卜,贞人扶问卦,酒祭先王阳甲么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:河𤉲十。","solution":"对先祖神河烧燎之祭十(名、头)为献么?","messages":[{"content":"请将下面的文言文转为白话文:河𤉲十。","role":"user"},{"content":"对先祖神河烧燎之祭十(名、头)为献么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:乙丑卜·","solution":"乙丑日卜问··","messages":[{"content":"请将下面的文言文转为白话文:乙丑卜·","role":"user"},{"content":"乙丑日卜问··","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸亥卜,㱿,贞我史𢦏缶。","solution":"癸亥日占卜,贞人㱿问卦,贞问我的史(史为武官)𢦏戕缶(基方)么?","messages":[{"content":"请将下面的文言文转为白话文:癸亥卜,㱿,贞我史𢦏缶。","role":"user"},{"content":"癸亥日占卜,贞人㱿问卦,贞问我的史(史为武官)𢦏戕缶(基方)么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:甲子卜··令众·田,若。","solution":"甲子日占问···令众由民··农田,顺若么?","messages":[{"content":"请将下面的文言文转为白话文:甲子卜··令众·田,若。","role":"user"},{"content":"甲子日占问···令众由民··农田,顺若么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:甲子卜,内,翌乙丑【启】。乙丑···","solution":"甲子日占卜,贞人内问卦,未来的乙丑日(天气晴启)么?乙丑日··","messages":[{"content":"请将下面的文言文转为白话文:甲子卜,内,翌乙丑【启】。乙丑···","role":"user"},{"content":"甲子日占卜,贞人内问卦,未来的乙丑日(天气晴启)么?乙丑日··","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞勿令舟。","solution":"贞问不命令方国贵族名舟者么?","messages":[{"content":"请将下面的文言文转为白话文:贞勿令舟。","role":"user"},{"content":"贞问不命令方国贵族名舟者么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:三十牢又羌。","solution":"还是三十对牛和羌奴为献?","messages":[{"content":"请将下面的文言文转为白话文:三十牢又羌。","role":"user"},{"content":"还是三十对牛和羌奴为献?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:丁亥,贞王其汏方𠬝,呼御事。","solution":"丁亥日卜问,商王之(子辈名)汏行方祭用𠬝奴,命令他行事么?","messages":[{"content":"请将下面的文言文转为白话文:丁亥,贞王其汏方𠬝,呼御事。","role":"user"},{"content":"丁亥日卜问,商王之(子辈名)汏行方祭用𠬝奴,命令他行事么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:壬申王勿·不其𰅱。壬申狩𰅱。","solution":"壬申日商壬不·不会有所擒获。壬申日去狩猎,能有擒获。","messages":[{"content":"请将下面的文言文转为白话文:壬申王勿·不其𰅱。壬申狩𰅱。","role":"user"},{"content":"壬申日商壬不·不会有所擒获。壬申日去狩猎,能有擒获。","role":"assistant"}]},{"problem":"请将下面的文言文转��白话文:癸巳卜,令𰅵省廪。","solution":"癸巳日占问,命令贵族𰅵去省视仓廪么?","messages":[{"content":"请将下面的文言文转为白话文:癸巳卜,令𰅵省廪。","role":"user"},{"content":"癸巳日占问,命令贵族𰅵去省视仓廪么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:【】㱿,贞犬延亡𰶤。","solution":"某日占卜,贞人㱿问卦,贞问犬官名延者没有母豕?","messages":[{"content":"请将下面的文言文转为白话文:【】㱿,贞犬延亡𰶤。","role":"user"},{"content":"某日占卜,贞人㱿问卦,贞问犬官名延者没有母豕?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞咸弗佐王。","solution":"贞问还是先王大乙不佐助商王呢?","messages":[{"content":"请将下面的文言文转为白话文:贞咸弗佐王。","role":"user"},{"content":"贞问还是先王大乙不佐助商王呢?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:五牛。","solution":"还是五头牛?","messages":[{"content":"请将下面的文言文转为白话文:五牛。","role":"user"},{"content":"还是五头牛?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞王飨。","solution":"贞问商王飨宴么?","messages":[{"content":"请将下面的文言文转为白话文:贞王飨。","role":"user"},{"content":"贞问商王飨宴么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:壬辰【卜】,贞王【其】田,亡灾。","solution":"壬辰日占卜,贞问商王去田猎,没有灾祸之事发生吧?","messages":[{"content":"请将下面的文言文转为白话文:壬辰【卜】,贞王【其】田,亡灾。","role":"user"},{"content":"壬辰日占卜,贞问商王去田猎,没有灾祸之事发生吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:中日至昃不雨。","solution":"正当中午的时候至太阳偏西的时候,不下雨么?","messages":[{"content":"请将下面的文言文转为白话文:中日至昃不雨。","role":"user"},{"content":"正当中午的时候至太阳偏西的时候,不下雨么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:其𬂤。","solution":"··进行𬂤焚么?","messages":[{"content":"请将下面的文言文转为白话文:其𬂤。","role":"user"},{"content":"··进行𬂤焚么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:戊戌···缶自","solution":"戊戌日··贵族名缶者自么?","messages":[{"content":"请将下面的文言文转为白话文:戊戌···缶自","role":"user"},{"content":"戊戌日··贵族名缶者自么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞不其易日。","solution":"贞问还是天气不阴蔽呢?","messages":[{"content":"请将下面的文言文转为白话文:贞不其易日。","role":"user"},{"content":"贞问还是天气不阴蔽呢?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:辛卯卜,侑于伊尹一羌一牢。","solution":"辛卯日卜问,行侑求之祭于旧老名臣伊尹,用一名羌奴,一对牛为献牲么?","messages":[{"content":"请将下面的文言文转为白话文:辛卯卜,侑于伊尹一羌一牢。","role":"user"},{"content":"辛卯日卜问,行侑求之祭于旧老名臣伊尹,用一名羌奴,一对牛为献牲么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:六牛。","solution":"用六牛为献么?此片似不缺字,但“五五”不成辞例。从第(2)辞“六牛”看,第","messages":[{"content":"请将下面的文言文转为白话文:六牛。","role":"user"},{"content":"用六牛为献么?此片似不缺字,但“五五”不成辞例。从第(2)辞“六牛”看,第","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚申,贞酒方块御","solution":"庚申日占卜贞问行酒祭··御除灾殃之祭···","messages":[{"content":"请将下面的文言文转为白话文:庚申,贞酒方块御","role":"user"},{"content":"庚申日占卜贞问行酒祭··御除灾殃之祭···","role":"assistant"}]}]],"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}
steps_data/load_data_from_hub_0_04a0a6d2a56ec3ebeea8e75fb431e9f80fdc7ea6/batch_1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seq_no":1,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":"e16694c3891d16c01bf6c4f6d41b96f263fef967","accumulated":false,"created_from":{},"batch_routed_to":[],"size":50,"data":[[{"problem":"请将下面的文言文转为白话文:贞祖乙孽王。","solution":"贞问先王祖乙会给商王造成孽害?","messages":[{"content":"请将下面的文言文转为白话文:贞祖乙孽王。","role":"user"},{"content":"贞问先王祖乙会给商王造成孽害?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:辛未侑于出日,兹不用』","solution":"辛未日侑求之祭,祀于出日典礼,兹卜不施行么?","messages":[{"content":"请将下面的文言文转为白话文:辛未侑于出日,兹不用』","role":"user"},{"content":"辛未日侑求之祭,祀于出日典礼,兹卜不施行么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:乙酉卜,于丁令马。","solution":"乙酉日占卜,在丁日命令马队之官么?","messages":[{"content":"请将下面的文言文转为白话文:乙酉卜,于丁令马。","role":"user"},{"content":"乙酉日占卜,在丁日命令马队之官么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸卯卜,㱿,贞【旬亡祸】。","solution":"五。三癸卯日占卜,贞人㱿问卦,贞问【下一个十天一旬内没有灾祸吧?】。商王武丁看了卜兆判断说:有灾祟之事·【其后残,全辞不可得知】。事后所记应验的结果是·【辞残】天气变坏,起了大风暴,当天晚上(辞残,应为“自某地监狱逃跑了”)羌奴五人。","messages":[{"content":"请将下面的文言文转为白话文:癸卯卜,㱿,贞【旬亡祸】。","role":"user"},{"content":"五。三癸卯日占卜,贞人㱿问卦,贞问【下一个十天一旬内没有灾祸吧?】。商王武丁看了卜兆判断说:有灾祟之事·【其后残,全辞不可得知】。事后所记应验的结果是·【辞残】天气变坏,起了大风暴,当天晚上(辞残,应为“自某地监狱逃跑了”)羌奴五人。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:己丑卜,㱿。","solution":"己丑日卜问,贞人㱿问卦。","messages":[{"content":"请将下面的文言文转为白话文:己丑卜,㱿。","role":"user"},{"content":"己丑日卜问,贞人㱿问卦。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:乙未,贞大御弜遘翌日,其兴。","solution":"乙未日卜问,大行御除灾殃之祭时,没有遇到翌日之祭,可以兴事行祭吧?","messages":[{"content":"请将下面的文言文转为白话文:乙未,贞大御弜遘翌日,其兴。","role":"user"},{"content":"乙未日卜问,大行御除灾殃之祭时,没有遇到翌日之祭,可以兴事行祭吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:允隻(获)鹿四百五十一。","solution":"果然捕获鹿四百五十一头。","messages":[{"content":"请将下面的文言文转为白话文:允隻(获)鹿四百五十一。","role":"user"},{"content":"果然捕获鹿四百五十一头。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞勿惟王自飨。","solution":"贞问商王不独自飨宴么?","messages":[{"content":"请将下面的文言文转为白话文:贞勿惟王自飨。","role":"user"},{"content":"贞问商王不独自飨宴么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:方块方块卜,宾,贞令臿途𰅱。","solution":"某日占卜,贞人宾问卦,贞问命令贵族名臿者途及贵族𰅱么?","messages":[{"content":"请将下面的文言文转为白话文:方块方块卜,宾,贞令臿途𰅱。","role":"user"},{"content":"某日占卜,贞人宾问卦,贞问命令贵族名臿者途及贵族𰅱么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸卯,贞·豭九,下示幾【盟】","solution":"癸卯日贞问·公猪九头,自先王小乙始的下示𧎶杀取血衅祭么?","messages":[{"content":"请将下面的文言文转为白话文:癸卯,贞·豭九,下示幾【盟】","role":"user"},{"content":"癸卯日贞问·公猪九头,自先王小乙始的下示𧎶杀取血衅祭么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞于娀御。","solution":"贞问于娀神行御除灾殃之祭么?","messages":[{"content":"请将下面的文言文转为白话文:贞于娀御。","role":"user"},{"content":"贞问于娀神行御除灾殃之祭么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞···受黍年。","solution":"贞问·黍子会得到好年成么?","messages":[{"content":"请将下面的文言文转为白话文:贞···受黍年。","role":"user"},{"content":"贞问·黍子会得到好年成么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞亡降疾。","solution":"贞问没有降下疾疫吧?","messages":[{"content":"请将下面的文言文转为白话文:贞亡降疾。","role":"user"},{"content":"贞问没有降下疾疫吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞勿。","solution":"贞问不必(没有)了么?","messages":[{"content":"请将下面的文言文转为白话文:贞勿。","role":"user"},{"content":"贞问不必(没有)了么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:其祒妣甲祖辛奭,有正。","solution":"行祒(协、合)祭先王祖辛之配妣甲,有所祯祥么?","messages":[{"content":"请将下面的文言文转为白话文:其祒妣甲祖辛奭,有正。","role":"user"},{"content":"行祒(协、合)祭先王祖辛之配妣甲,有所祯祥么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞今甲午启。","solution":"贞问今天甲午日天气晴好么?","messages":[{"content":"请将下面的文言文转为白话文:贞今甲午启。","role":"user"},{"content":"贞问今天甲午日天气晴好么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞亡尤。在十月。","solution":"贞问没有灾忧之事发生?这是在十月占卜的。","messages":[{"content":"请将下面的文言文转为白话文:贞亡尤。在十月。","role":"user"},{"content":"贞问没有灾忧之事发生?这是在十月占卜的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:于女子。二","solution":"究竟祭不祭王室已故的男女贵族子弟们呢?","messages":[{"content":"请将下面的文言文转为白话文:于女子。二","role":"user"},{"content":"究竟祭不祭王室已故的男女贵族子弟们呢?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚辰卜,不雨。","solution":"庚辰日占卜,不下雨吧?","messages":[{"content":"请将下面的文言文转为白话文:庚辰卜,不雨。","role":"user"},{"content":"庚辰日占卜,不下雨吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸未,贞于木月延方。","solution":"癸未日卜问,在木月推延方祭?木月为商代月份之专名,但不知确指几月。","messages":[{"content":"请将下面的文言文转为白话文:癸未,贞于木月延方。","role":"user"},{"content":"癸未日卜问,在木月推延方祭?木月为商代月份之专名,但不知确指几月。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞于乙门令。","solution":"贞问在宗庙的乙门(专名)发布命令么?","messages":[{"content":"请将下面的文言文转为白话文:贞于乙门令。","role":"user"},{"content":"贞问在宗庙的乙门(专名)发布命令么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞戉其伐湔方𢦏。","solution":"贞问贵族名戉者率兵征伐湔方方国,使其受灾戕?(此处之灾,或有释为戉是否有灾者,但据下残“𢦏湔”,我们认为释为征伐湔方并使其灾戕为妥)。","messages":[{"content":"请将下面的文言文转为白话文:贞戉其伐湔方𢦏。","role":"user"},{"content":"贞问贵族名戉者率兵征伐湔方方国,使其受灾戕?(此处之灾,或有释为戉是否有灾者,但据下残“𢦏湔”,我们认为释为征伐湔方并使其灾戕为妥)。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:乙未卜,行,贞王宾奏自上甲衣于后,亡尤。在十二月。","solution":"乙未日占卜,贞人行问卦,贞问商王行傧奏之祭,从先公上甲合祭至于多位先王,没有灾忧吧?这是在十二月占卜的。","messages":[{"content":"请将下面的文言文转为白话文:乙未卜,行,贞王宾奏自上甲衣于后,亡尤。在十二月。","role":"user"},{"content":"乙未日占卜,贞人行问卦,贞问商王行傧奏之祭,从先公上甲合祭至于多位先王,没有灾忧吧?这是在十二月占卜的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞我··","solution":"贞问我··么?","messages":[{"content":"请将下面的文言文转为白话文:贞我··","role":"user"},{"content":"贞问我··么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞不其受佑。","solution":"贞问不会受到保佑么?","messages":[{"content":"请将下面的文言文转为白话文:贞不其受佑。","role":"user"},{"content":"贞问不会受到保佑么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:于癸酉延雨。","solution":"在癸酉日雨会绵延不断么?","messages":[{"content":"请将下面的文言文转为白话文:于癸酉延雨。","role":"user"},{"content":"在癸酉日雨会绵延不断么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:驭于之若,王弗每。","solution":"来福吉于此若顺,商王不会晦气不吉���","messages":[{"content":"请将下面的文言文转为白话文:驭于之若,王弗每。","role":"user"},{"content":"来福吉于此若顺,商王不会晦气不吉?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:方块方块【卜】方块,贞于来己亥酒高妣己眔妣庚。","solution":"某日占卜,某贞人问卦【辞残,但从其贞问内容与(1)辞同推知,似亦应为贞人宾于同日卜卦𠁼,贞问于未来的己亥日酒祭先祖之配高妣己及先王之配妣庚么?","messages":[{"content":"请将下面的文言文转为白话文:方块方块【卜】方块,贞于来己亥酒高妣己眔妣庚。","role":"user"},{"content":"某日占卜,某贞人问卦【辞残,但从其贞问内容与(1)辞同推知,似亦应为贞人宾于同日卜卦𠁼,贞问于未来的己亥日酒祭先祖之配高妣己及先王之配妣庚么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:壬寅卜,丁伐彘。","solution":"壬寅日占卜,于名丁先王行杀伐之祭,以彘猪为献?","messages":[{"content":"请将下面的文言文转为白话文:壬寅卜,丁伐彘。","role":"user"},{"content":"壬寅日占卜,于名丁先王行杀伐之祭,以彘猪为献?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:卯卜,行,贞王宾𫩀,亡尤。","solution":"己卯日占卜,贞人行问卦,贞问商王傧于𫩀祭,没有灾忧之事发生吧?(6>方块方块卜,行【贞王】宾兄庚方块,【亡】尤。某日占卜,贞人行问卦,贞问(商王)傧于祭兄庚(即时王祖甲之兄祖庚)之仪,(没有发生)灾忧之事吧?时王祖甲祭其兄祖庚称“兄庚”,可确知此版为第二期祖甲时物。","messages":[{"content":"请将下面的文言文转为白话文:卯卜,行,贞王宾𫩀,亡尤。","role":"user"},{"content":"己卯日占卜,贞人行问卦,贞问商王傧于𫩀祭,没有灾忧之事发生吧?(6>方块方块卜,行【贞王】宾兄庚方块,【亡】尤。某日占卜,贞人行问卦,贞问(商王)傧于祭兄庚(即时王祖甲之兄祖庚)之仪,(没有发生)灾忧之事吧?时王祖甲祭其兄祖庚称“兄庚”,可确知此版为第二期祖甲时物。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸【亥卜】禘【东】。","solution":"癸(亥日卜问),禘祭(东方之神)么?","messages":[{"content":"请将下面的文言文转为白话文:癸【亥卜】禘【东】。","role":"user"},{"content":"癸(亥日卜问),禘祭(东方之神)么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:亥有?","solution":"还是亥日有?","messages":[{"content":"请将下面的文言文转为白话文:亥有?","role":"user"},{"content":"还是亥日有?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:弜宾。","solution":"不用傧祭么?","messages":[{"content":"请将下面的文言文转为白话文:弜宾。","role":"user"},{"content":"不用傧祭么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞于敦。","solution":"贞问还是在敦这个地方?","messages":[{"content":"请将下面的文言文转为白话文:贞于敦。","role":"user"},{"content":"贞问还是在敦这个地方?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞翌甲寅其雨。","solution":"贞问未来的甲寅日下雨么?","messages":[{"content":"请将下面的文言文转为白话文:贞翌甲寅其雨。","role":"user"},{"content":"贞问未来的甲寅日下雨么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:【王】𰉏曰:其有酘。其隹庚吉。其隹·","solution":"商王武丁看了卜兆以后判断说:将有不祥之事发生。是庚日吉利呢?还是【某日吉利】(辞残,据前辞意拟补)?","messages":[{"content":"请将下面的文言文转为白话文:【王】𰉏曰:其有酘。其隹庚吉。其隹·","role":"user"},{"content":"商王武丁看了卜兆以后判断说:将有不祥之事发生。是庚日吉利呢?还是【某日吉利】(辞残,据前辞意拟补)?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸巳王卜贞,旬亡祸。王𰉏曰:吉。在十月又二甲午𫩻上甲,祭","solution":"大甲。癸巳日商壬卜问,未来的十大一旬之内没有灾祸之事发生吧?商王看了卜兆,又判断说:吉利!在十二月甲午这一天,用𫩻祭祀先王上甲,用祭祭祀先王大甲。","messages":[{"content":"请将下面的文言文转为白话文:癸巳王卜贞,旬亡祸。王𰉏曰:吉。在十月又二甲午𫩻上甲,祭","role":"user"},{"content":"大甲。癸巳日商壬卜问,未来的十大一旬之内没有灾祸之事发生吧?商王看了卜兆,又判断说:吉利!在十二月甲午这一天,用𫩻祭祀先王上甲,用祭祭祀先王大甲。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:【贞】勿登五千。","solution":"贞问还是不征集五千兵众?此版为“相间刻辞”,上刻“界划”五处。","messages":[{"content":"请将下面的文言文转为白话文:【贞】勿登五千。","role":"user"},{"content":"贞问还是不征集五千兵众?此版为“相间刻辞”,上刻“界划”五处。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:于入自日廼𢼊。","solution":"是太阳落时才举行胣裂祭牲之祭?","messages":[{"content":"请将下面的文言文转为白话文:于入自日廼𢼊。","role":"user"},{"content":"是太阳落时才举行胣裂祭牲之祭?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:其自北来雨。","solution":"还是雨从北边过来呢?","messages":[{"content":"请将下面的文言文转为白话文:其自北来雨。","role":"user"},{"content":"还是雨从北边过来呢?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:叀竝𩧷亡灾。","solution":"竝地的𩧷马也没有灾祸之事发生吧?竝地在山西,古代以产名马著称,如古籍里所说“屈产之乘”。","messages":[{"content":"请将下面的文言文转为白话文:叀竝𩧷亡灾。","role":"user"},{"content":"竝地的𩧷马也没有灾祸之事发生吧?竝地在山西,古代以产名马著称,如古籍里所说“屈产之乘”。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:王𰉏【曰】·","solution":"王看了卜兆以后·(其后字残,所说不可得知)。","messages":[{"content":"请将下面的文言文转为白话文:王𰉏【曰】·","role":"user"},{"content":"王看了卜兆以后·(其后字残,所说不可得知)。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:翌辛卯不雨。","solution":"未来的辛卯日不下雨么?","messages":[{"content":"请将下面的文言文转为白话文:翌辛卯不雨。","role":"user"},{"content":"未来的辛卯日不下雨么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:弜宾。","solution":"不傧祭?","messages":[{"content":"请将下面的文言文转为白话文:弜宾。","role":"user"},{"content":"不傧祭?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞妇好有取不。","solution":"贞问妇好被娶了么?","messages":[{"content":"请将下面的文言文转为白话文:贞妇好有取不。","role":"user"},{"content":"贞问妇好被娶了么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸巳于岳。","solution":"癸巳日于先祖神岳行祭么?","messages":[{"content":"请将下面的文言文转为白话文:癸巳于岳。","role":"user"},{"content":"癸巳日于先祖神岳行祭么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:丁丑卜,尹,贞王宾中丁肜,亡尤。","solution":"丁丑日占卜,贞人尹问卦,贞问商王傧于肜祭先王中丁之仪,没有灾忧之事发生吧?","messages":[{"content":"请将下面的文言文转为白话文:丁丑卜,尹,贞王宾中丁肜,亡尤。","role":"user"},{"content":"丁丑日占卜,贞人尹问卦,贞问商王傧于肜祭先王中丁之仪,没有灾忧之事发生吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞易(赐)牛。","solution":"贞问赏赐牛么?","messages":[{"content":"请将下面的文言文转为白话文:贞易(赐)牛。","role":"user"},{"content":"贞问赏赐牛么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:于翌甲子步。","solution":"还是在未来的甲子日行步脯之祭呢?","messages":[{"content":"请将下面的文言文转为白话文:于翌甲子步。","role":"user"},{"content":"还是在未来的甲子日行步脯之祭呢?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:在濜。","solution":"在濜地么?","messages":[{"content":"请将下面的文言文转为白话文:在濜。","role":"user"},{"content":"在濜地么?","role":"assistant"}]}]],"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}
steps_data/load_data_from_hub_0_04a0a6d2a56ec3ebeea8e75fb431e9f80fdc7ea6/batch_2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seq_no":2,"step_name":"load_data_from_hub_0","last_batch":false,"data_hash":"122d08c31c02ee6d6e9ebb7f1b4554299ca46c69","accumulated":false,"created_from":{},"batch_routed_to":[],"size":50,"data":[[{"problem":"请将下面的文言文转为白话文:··𫳅。","solution":"一对羊。","messages":[{"content":"请将下面的文言文转为白话文:··𫳅。","role":"user"},{"content":"一对羊。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:用。","solution":"此卜行用么?","messages":[{"content":"请将下面的文言文转为白话文:用。","role":"user"},{"content":"此卜行用么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:呼伐𢀛方。","solution":"命令征伐𢀛方方国么?","messages":[{"content":"请将下面的文言文转为白话文:呼伐𢀛方。","role":"user"},{"content":"命令征伐𢀛方方国么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚子卜,宾,贞梦白牛,隹祸。","solution":"庚子日占卜,贞人宾问卦,贞问梦中见到了白牛,不会出现灾祸之事吧?","messages":[{"content":"请将下面的文言文转为白话文:庚子卜,宾,贞梦白牛,隹祸。","role":"user"},{"content":"庚子日占卜,贞人宾问卦,贞问梦中见到了白牛,不会出现灾祸之事吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:戊子卜,师,侑母壬。","solution":"戊子日占卜,贞人师问卦,行侑求之祭于母壬么?","messages":[{"content":"请将下面的文言文转为白话文:戊子卜,师,侑母壬。","role":"user"},{"content":"戊子日占卜,贞人师问卦,行侑求之祭于母壬么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚辰卜,㱿,贞侑于丁五𫳅。","solution":"庚辰日占卜,贞人㱿问卦,贞问行侑求之祭于丁名(商王名丁者八人·此无庙号及区别字,不能确指何王)的先王,以五对羊为献牲么?","messages":[{"content":"请将下面的文言文转为白话文:庚辰卜,㱿,贞侑于丁五𫳅。","role":"user"},{"content":"庚辰日占卜,贞人㱿问卦,贞问行侑求之祭于丁名(商王名丁者八人·此无庙号及区别字,不能确指何王)的先王,以五对羊为献牲么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞··酒三","solution":"贞问··酒祭三·么?","messages":[{"content":"请将下面的文言文转为白话文:贞··酒三","role":"user"},{"content":"贞问··酒祭三·么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:东土受年。","solution":"东方的国土会得到好年成吧?","messages":[{"content":"请将下面的文言文转为白话文:东土受年。","role":"user"},{"content":"东方的国土会得到好年成吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:辛卯卜,㱿。","solution":"辛卯日占卜,贞人㱿问卦。","messages":[{"content":"请将下面的文言文转为白话文:辛卯卜,㱿。","role":"user"},{"content":"辛卯日占卜,贞人㱿问卦。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:亡灾。","solution":"没有灾祸吧?","messages":[{"content":"请将下面的文言文转为白话文:亡灾。","role":"user"},{"content":"没有灾祸吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:其八朋。","solution":"八朋贝么?","messages":[{"content":"请将下面的文言文转为白话文:其八朋。","role":"user"},{"content":"八朋贝么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:不遘小雨。","solution":"不会遇到小雨吧?","messages":[{"content":"请将下面的文言文转为白话文:不遘小雨。","role":"user"},{"content":"不会遇到小雨吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:叀𰅵令省𧇴。","solution":"是命令贵族名𰅵者去省视仓廪么?","messages":[{"content":"请将下面的文言文转为白话文:叀𰅵令省𧇴。","role":"user"},{"content":"是命令贵族名𰅵者去省视仓廪么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:甲寅卜,亘,呼犬登执豕,幸。","solution":"甲寅日占卜,贞人亘问卦,命令犬官名登者执擒野豕,能幸执捉到么?","messages":[{"content":"请将下面的文言文转为白话文:甲寅卜,亘,呼犬登执豕,幸。","role":"user"},{"content":"甲寅日占卜,贞人亘问卦,命令犬官名登者执擒野豕,能幸执捉到么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:右戍不雉众。吉","solution":"右边的戍队不伤损众(自由民)么?此卜吉利。","messages":[{"content":"请将下面的文言文转为白话文:右戍不雉众。吉","role":"user"},{"content":"右边的戍队不伤损众(自由民)么?此卜吉利。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞于西母酒褅。","solution":"贞问行酒祭、褅祭于西母(司生育之神)么?","messages":[{"content":"请将下面的文言文转为白话文:贞于西母酒褅。","role":"user"},{"content":"贞问行酒祭、褅祭于西母(司生育之神)么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:·于宗门寻王羌。","solution":"于宗庙门处行寻祭,以商王的羌奴为献牲么?","messages":[{"content":"请将下面的文言文转为白话文:·于宗门寻王羌。","role":"user"},{"content":"于宗庙门处行寻祭,以商王的羌奴为献牲么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸丑卜,王,贞旬亡祸。在四月甲寅酒翌自上甲。","solution":"癸丑日占卜,商王问卦,贞问下一个十天一旬之内没有灾祸之事发生吧在四月甲寅日行酒祭翌祭自先公上甲王开始,祭祀各王。","messages":[{"content":"请将下面的文言文转为白话文:癸丑卜,王,贞旬亡祸。在四月甲寅酒翌自上甲。","role":"user"},{"content":"癸丑日占卜,商王问卦,贞问下一个十天一旬之内没有灾祸之事发生吧在四月甲寅日行酒祭翌祭自先公上甲王开始,祭祀各王。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:乙酉卜,尹,贞王宾祖乙肜,亡尤。","solution":"乙酉日占卜,贞人尹问卦,贞问商王傧于肜祭先王祖乙,没有灾忧吧?","messages":[{"content":"请将下面的文言文转为白话文:乙酉卜,尹,贞王宾祖乙肜,亡尤。","role":"user"},{"content":"乙酉日占卜,贞人尹问卦,贞问商王傧于肜祭先王祖乙,没有灾忧吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞侑于𭭟。","solution":"贞问还是行侑求之祭于𭭟神?","messages":[{"content":"请将下面的文言文转为白话文:贞侑于𭭟。","role":"user"},{"content":"贞问还是行侑求之祭于𭭟神?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:丁卯,𰇉有疾。","solution":"丁卯日商王王子名𰇉(子𰇉为商王武丁之子,见《甲骨文合集》296、381、3076、30等第卄期武丁时甲骨,其寿命当较长,在第亠期祖庚、祖甲时当为其同辈,但出组卜辞中常见其“有疾”之问,或已年纪老迈,体质每况愈下)者有疾病了么?","messages":[{"content":"请将下面的文言文转为白话文:丁卯,𰇉有疾。","role":"user"},{"content":"丁卯日商王王子名𰇉(子𰇉为商王武丁之子,见《甲骨文合集》296、381、3076、30等第卄期武丁时甲骨,其寿命当较长,在第亠期祖庚、祖甲时当为其同辈,但出组卜辞中常见其“有疾”之问,或已年纪老迈,体质每况愈下)者有疾病了么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:乙丑,贞王令·","solution":"乙丑日卜问,商王命令·么?","messages":[{"content":"请将下面的文言文转为白话文:乙丑,贞王令·","role":"user"},{"content":"乙丑日卜问,商王命令·么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚辰,贞日有戠,非祸,隹若。","solution":"庚辰日贞问,太阳出现了黑子,没有祸事,是若顺吉利吧","messages":[{"content":"请将下面的文言文转为白话文:庚辰,贞日有戠,非祸,隹若。","role":"user"},{"content":"庚辰日贞问,太阳出现了黑子,没有祸事,是若顺吉利吧","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:辛巳[卜],王,侑祖","solution":"辛巳日占卜,商王问卦,行侑求之祭于先王祖·么?","messages":[{"content":"请将下面的文言文转为白话文:辛巳[卜],王,侑祖","role":"user"},{"content":"辛巳日占卜,商王问卦,行侑求之祭于先王祖·么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚戌卜,尹,贞王宾小乙奭妣庚,亡【尤】。","solution":"庚戌日占卜,贞人尹问卦,贞问商王傧祭先王小乙之配名妣庚者没有灾忧之事发生吧?","messages":[{"content":"请将下面的文言文转为白话文:庚戌卜,尹,贞王宾小乙奭妣庚,亡【尤】。","role":"user"},{"content":"庚戌日占卜,贞人尹问卦,贞问商王傧祭先王小乙之配名妣庚者没有灾忧之事发生吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸丑,贞其大御,叀甲子酒。","solution":"癸丑日贞问,大行御除灾殃之祭,是在甲子日举行酒祭么?","messages":[{"content":"请将下面的文言文转为白话文:癸丑,贞其大御,叀甲子酒。","role":"user"},{"content":"癸丑日贞问,大行御除灾殃之祭,是在甲子日举行酒祭么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:王往狩。","solution":"商王去狩猎吧?","messages":[{"content":"请将下面的文言文转为白话文:王往狩。","role":"user"},{"content":"商王去狩猎吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:己卯卜,允,贞令多子族从犬侯寇周,叶王事。𣥄月。","solution":"己卯日占卜,贞人允问卦,贞问命令商王诸子辈的族军率领贵族犬侯之军去征讨周方国,勤劳王事么?这是五月占卜的。","messages":[{"content":"请将下面的文言文转为白话文:己卯卜,允,贞令多子族从犬侯寇周,叶王事。𣥄月。","role":"user"},{"content":"己卯日占卜,贞人允问卦,贞问命令商王诸子辈的族军率领贵族犬侯之军去征讨周方国,勤劳王事么?这是五月占卜的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:弗幸。","solution":"没有幸捉住么?","messages":[{"content":"请将下面的文言文转为白话文:弗幸。","role":"user"},{"content":"没有幸捉住么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸未卜,有祸百工。","solution":"癸未日卜问,有祸事于百工的工官么?","messages":[{"content":"请将下面的文言文转为白话文:癸未卜,有祸百工。","role":"user"},{"content":"癸未日卜问,有祸事于百工的工官么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:方既食,戍廼伐,𢦏。","solution":"方国已经食时(中午),戍官就去征伐,能对其有所𢦏伤么?","messages":[{"content":"请将下面的文言文转为白话文:方既食,戍廼伐,𢦏。","role":"user"},{"content":"方国已经食时(中午),戍官就去征伐,能对其有所𢦏伤么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:弗及今夕雨。","solution":"还是不到今天夜里下雨?","messages":[{"content":"请将下面的文言文转为白话文:弗及今夕雨。","role":"user"},{"content":"还是不到今天夜里下雨?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:大。酉。","solution":"意不明。","messages":[{"content":"请将下面的文言文转为白话文:大。酉。","role":"user"},{"content":"意不明。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞亡尤。在十月。","solution":"贞问没有灾忧之事发生?这是在十月占卜的。","messages":[{"content":"请将下面的文言文转为白话文:贞亡尤。在十月。","role":"user"},{"content":"贞问没有灾忧之事发生?这是在十月占卜的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:辛亥,贞侑·","solution":"辛亥日贞问,行侑求之祭","messages":[{"content":"请将下面的文言文转为白话文:辛亥,贞侑·","role":"user"},{"content":"辛亥日贞问,行侑求之祭","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸亥卜,宾,贞旬亡祸。五月。","solution":"癸亥日占卜,贞人宾问卦,贞问未来的十天一旬之内没有灾祸之事发生吧?这是五月占卜的。","messages":[{"content":"请将下面的文言文转为白话文:癸亥卜,宾,贞旬亡祸。五月。","role":"user"},{"content":"癸亥日占卜,贞人宾问卦,贞问未来的十天一旬之内没有灾祸之事发生吧?这是五月占卜的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:𩎴风叀豚,有大雨。","solution":"祭祀𩎴风(西方风名)用小猪为献,会有大雨吧?","messages":[{"content":"请将下面的文言文转为白话文:𩎴风叀豚,有大雨。","role":"user"},{"content":"祭祀𩎴风(西方风名)用小猪为献,会有大雨吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:甲辰,㱿,贞翌乙巳侑于父乙𫳅,用。","solution":"甲辰日占卜,贞人㱿问卦,贞问未来的乙巳日行侑求之祭于先王父乙(即武丁之父小乙),以一对羊为祭牲么?","messages":[{"content":"请将下面的文言文转为白话文:甲辰,㱿,贞翌乙巳侑于父乙𫳅,用。","role":"user"},{"content":"甲辰日占卜,贞人㱿问卦,贞问未来的乙巳日行侑求之祭于先王父乙(即武丁之父小乙),以一对羊为祭牲么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸未卜,贞至蜀亡祸。","solution":"癸未日占卜,贞问至于蜀地没有灾祸吧?","messages":[{"content":"请将下面的文言文转为白话文:癸未卜,贞至蜀亡祸。","role":"user"},{"content":"癸未日占卜,贞问至于蜀地没有灾祸吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞勿呼伐。","solution":"贞问还是不命令征伐(𢀛方方国)么?","messages":[{"content":"请将下面的文言文转为白话文:贞勿呼伐。","role":"user"},{"content":"贞问还是不命令征伐(𢀛方方国)么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:竝由。","solution":"贵族竝有由祸么?","messages":[{"content":"请将下面的文言文转为白话文:竝由。","role":"user"},{"content":"贵族竝有由祸么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸巳卜,其呼戍·","solution":"癸巳日占卜,命令戍官(守卫)","messages":[{"content":"请将下面的文言文转为白话文:癸巳卜,其呼戍·","role":"user"},{"content":"癸巳日占卜,命令戍官(守卫)","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸未卜,父甲杏物牛。兹用","solution":"癸未日占卜,商王先父祖甲(廪辛之父甲)行杏祭,以杂色的牛为献牲么?此卜施行了。","messages":[{"content":"请将下面的文言文转为白话文:癸未卜,父甲杏物牛。兹用","role":"user"},{"content":"癸未日占卜,商王先父祖甲(廪辛之父甲)行杏祭,以杂色的牛为献牲么?此卜施行了。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:甲子,贞大邑有入在。","solution":"甲子日问卦,大邑商都有所贡入,在方块(字不识)地么?","messages":[{"content":"请将下面的文言文转为白话文:甲子,贞大邑有入在。","role":"user"},{"content":"甲子日问卦,大邑商都有所贡入,在方块(字不识)地么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:丙申卜,叀兹戣用于河。","solution":"丙申日占卜,是将此戣(铜兵器)用于祭先祖神河么?","messages":[{"content":"请将下面的文言文转为白话文:丙申卜,叀兹戣用于河。","role":"user"},{"content":"丙申日占卜,是将此戣(铜兵器)用于祭先祖神河么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:戠釐,雨。兹用。","solution":"用戠牛为祭祀福肉,会下雨?此卜施行了。","messages":[{"content":"请将下面的文言文转为白话文:戠釐,雨。兹用。","role":"user"},{"content":"用戠牛为祭祀福肉,会下雨?此卜施行了。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚辰卜,方不飨。","solution":"庚辰日占卜,方方国不飨食?","messages":[{"content":"请将下面的文言文转为白话文:庚辰卜,方不飨。","role":"user"},{"content":"庚辰日占卜,方方国不飨食?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞疾趾于妣庚御。","solution":"贞问有了脚疾,行御除灾殃之祭于先妣名妣庚者么?","messages":[{"content":"请将下面的文言文转为白话文:贞疾趾于妣庚御。","role":"user"},{"content":"贞问有了脚疾,行御除灾殃之祭于先妣名妣庚者么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:庚寅,贞王其征北方。","solution":"庚寅日卜问,商王伐征北方方国么?","messages":[{"content":"请将下面的文言文转为白话文:庚寅,贞王其征北方。","role":"user"},{"content":"庚寅日卜问,商王伐征北方方国么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:丁亥,贞【王】令冓【取】方块方。","solution":"丁亥日贞问,商壬命令贵族名冓者取聚于某方国么?","messages":[{"content":"请将下面的文言文转为白话文:丁亥,贞【王】令冓【取】方块方。","role":"user"},{"content":"丁亥日贞问,商壬命令贵族名冓者取聚于某方国么?","role":"assistant"}]}]],"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}
steps_data/load_data_from_hub_0_88f19108c6d8fd87f113fbd9b85f4cc1add05dbc/batch_0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seq_no":0,"step_name":"load_data_from_hub_0","last_batch":true,"data_hash":"28999ffa2f36c674b3d1a3a4399f16cfa50e8060","accumulated":false,"created_from":{},"batch_routed_to":[],"size":10,"data":[[{"problem":"请将下面的文言文转为白话文:五云酒。","solution":"··对五云之神行酒祭么?","messages":[{"content":"请将下面的文言文转为白话文:五云酒。","role":"user"},{"content":"··对五云之神行酒祭么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:癸卯王卜贞,旬亡祸。在十月王𰉏曰:大吉。","solution":"癸卯日商王卜问,在未来的十天一旬之内没有灾祸之事发生吧?在十月商王看了卜兆,又判断说:大大的吉利!","messages":[{"content":"请将下面的文言文转为白话文:癸卯王卜贞,旬亡祸。在十月王𰉏曰:大吉。","role":"user"},{"content":"癸卯日商王卜问,在未来的十天一旬之内没有灾祸之事发生吧?在十月商王看了卜兆,又判断说:大大的吉利!","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:弜侑出人日。","solution":"不以侑求之祭祀出日、人日么?","messages":[{"content":"请将下面的文言文转为白话文:弜侑出人日。","role":"user"},{"content":"不以侑求之祭祀出日、人日么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:方块丑卜,亘,贞其延雨。","solution":"某丑日占卜,贞人亘问卦,贞问雨会绵延不停么?","messages":[{"content":"请将下面的文言文转为白话文:方块丑卜,亘,贞其延雨。","role":"user"},{"content":"某丑日占卜,贞人亘问卦,贞问雨会绵延不停么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞于母己御。","solution":"贞问于母己行御除灾殃之祭吧?","messages":[{"content":"请将下面的文言文转为白话文:贞于母己御。","role":"user"},{"content":"贞问于母己行御除灾殃之祭吧?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:方块方块【卜】,方块,贞大示十牛。九月。","solution":"贞问大示先王献十头牛为祭?这是九月占卜的。","messages":[{"content":"请将下面的文言文转为白话文:方块方块【卜】,方块,贞大示十牛。九月。","role":"user"},{"content":"贞问大示先王献十头牛为祭?这是九月占卜的。","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞𤰔多臣呼从沚𪭑。","solution":"贞问命令是沚𪭑率领众多臣奴么?","messages":[{"content":"请将下面的文言文转为白话文:贞𤰔多臣呼从沚𪭑。","role":"user"},{"content":"贞问命令是沚𪭑率领众多臣奴么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:戊申卜,㱿,贞若。","solution":"戊申日占卜,贞人㱿问卦,贞问一切会顺利么?","messages":[{"content":"请将下面的文言文转为白话文:戊申卜,㱿,贞若。","role":"user"},{"content":"戊申日占卜,贞人㱿问卦,贞问一切会顺利么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:贞王勿从沚𪭑。","solution":"贞问是王不率领沚𪭑么?","messages":[{"content":"请将下面的文言文转为白话文:贞王勿从沚𪭑。","role":"user"},{"content":"贞问是王不率领沚𪭑么?","role":"assistant"}]},{"problem":"请将下面的文言文转为白话文:··用,十祀。","solution":"·用掉了,是商王十年?","messages":[{"content":"请将下面的文言文转为白话文:··用,十祀。","role":"user"},{"content":"·用掉了,是商王十年?","role":"assistant"}]}]],"type_info":{"module":"distilabel.pipeline.batch","name":"_Batch"}}
steps_data/text_generation_0_0873d6dc3a6f7216415c198a6cfb1587f29dc1ff/batch_0.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4256422650d141f228fe954acee98679da412984c29a569877eefd3af69315a
3
+ size 11422959
tokenizer_config.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "151643": {
7
+ "content": "<|end▁of▁sentence|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "151644": {
15
+ "content": "<|User|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": false
21
+ },
22
+ "151645": {
23
+ "content": "<|Assistant|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "151646": {
31
+ "content": "<|begin▁of▁sentence|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "151647": {
39
+ "content": "<|EOT|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": false
45
+ },
46
+ "151648": {
47
+ "content": "<think>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": false
53
+ },
54
+ "151649": {
55
+ "content": "</think>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": false
61
+ },
62
+ "151650": {
63
+ "content": "<|quad_start|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "151651": {
71
+ "content": "<|quad_end|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "151652": {
79
+ "content": "<|vision_start|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "151653": {
87
+ "content": "<|vision_end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "151654": {
95
+ "content": "<|vision_pad|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "151655": {
103
+ "content": "<|image_pad|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "151656": {
111
+ "content": "<|video_pad|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "151657": {
119
+ "content": "<tool_call>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": false
125
+ },
126
+ "151658": {
127
+ "content": "</tool_call>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": false
133
+ },
134
+ "151659": {
135
+ "content": "<|fim_prefix|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": false
141
+ },
142
+ "151660": {
143
+ "content": "<|fim_middle|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": false
149
+ },
150
+ "151661": {
151
+ "content": "<|fim_suffix|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": false
157
+ },
158
+ "151662": {
159
+ "content": "<|fim_pad|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": false
165
+ },
166
+ "151663": {
167
+ "content": "<|repo_name|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": false
173
+ },
174
+ "151664": {
175
+ "content": "<|file_sep|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": false
181
+ }
182
+ },
183
+ "bos_token": "<|begin▁of▁sentence|>",
184
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
+ "clean_up_tokenization_spaces": false,
186
+ "eos_token": "<|end▁of▁sentence|>",
187
+ "extra_special_tokens": {},
188
+ "legacy": true,
189
+ "model_max_length": 16384,
190
+ "pad_token": "<|end▁of▁sentence|>",
191
+ "padding_side": "left",
192
+ "sp_model_kwargs": {},
193
+ "tokenizer_class": "LlamaTokenizerFast",
194
+ "unk_token": null,
195
+ "use_default_system_prompt": false
196
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 0.0,
3
+ "train_loss": 0.036479350634126606,
4
+ "train_runtime": 34004.8654,
5
+ "train_samples": 1300,
6
+ "train_samples_per_second": 1.529,
7
+ "train_steps_per_second": 0.006
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,590 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 33.390243902439025,
5
+ "eval_steps": 100,
6
+ "global_step": 200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "completion_length": 393.713187789917,
13
+ "epoch": 0.975609756097561,
14
+ "grad_norm": 0.2263876348733902,
15
+ "kl": 0.00019991397857666016,
16
+ "learning_rate": 5e-06,
17
+ "loss": 0.0,
18
+ "reward": 0.3574329398572445,
19
+ "reward_std": 0.317268418520689,
20
+ "rewards/accuracy_reward_word": 0.3574329435825348,
21
+ "rewards/format_reward": 0.0,
22
+ "step": 5
23
+ },
24
+ {
25
+ "completion_length": 359.7960117224491,
26
+ "epoch": 1.7804878048780488,
27
+ "grad_norm": 0.4675009250640869,
28
+ "kl": 0.029177347819010418,
29
+ "learning_rate": 1e-05,
30
+ "loss": 0.001,
31
+ "reward": 0.4508711948539271,
32
+ "reward_std": 0.29290521957657556,
33
+ "rewards/accuracy_reward_word": 0.45087120072408154,
34
+ "rewards/format_reward": 0.0,
35
+ "step": 10
36
+ },
37
+ {
38
+ "completion_length": 302.4170865145597,
39
+ "epoch": 2.5853658536585367,
40
+ "grad_norm": 1.2330620288848877,
41
+ "kl": 0.06524658203125,
42
+ "learning_rate": 1.5000000000000002e-05,
43
+ "loss": 0.0022,
44
+ "reward": 0.5415759321415063,
45
+ "reward_std": 0.24065017248644974,
46
+ "rewards/accuracy_reward_word": 0.5415759276260029,
47
+ "rewards/format_reward": 0.0,
48
+ "step": 15
49
+ },
50
+ {
51
+ "completion_length": 272.7953320127545,
52
+ "epoch": 3.3902439024390243,
53
+ "grad_norm": 0.30344322323799133,
54
+ "kl": 0.09641335227272728,
55
+ "learning_rate": 2e-05,
56
+ "loss": 0.0032,
57
+ "reward": 0.5741085495912668,
58
+ "reward_std": 0.2243843080871033,
59
+ "rewards/accuracy_reward_word": 0.5741085464304144,
60
+ "rewards/format_reward": 0.0,
61
+ "step": 20
62
+ },
63
+ {
64
+ "completion_length": 281.6178417783795,
65
+ "epoch": 4.195121951219512,
66
+ "grad_norm": 0.3025609254837036,
67
+ "kl": 0.16150272253787878,
68
+ "learning_rate": 1.9961946980917457e-05,
69
+ "loss": 0.0053,
70
+ "reward": 0.5691162316185056,
71
+ "reward_std": 0.24541335485198282,
72
+ "rewards/accuracy_reward_word": 0.569116218975096,
73
+ "rewards/format_reward": 0.0,
74
+ "step": 25
75
+ },
76
+ {
77
+ "completion_length": 292.8073728156812,
78
+ "epoch": 5.0,
79
+ "grad_norm": 0.8864567279815674,
80
+ "kl": 0.36754261363636365,
81
+ "learning_rate": 1.9848077530122083e-05,
82
+ "loss": 0.0122,
83
+ "reward": 0.6015709214138262,
84
+ "reward_std": 0.24950947499636447,
85
+ "rewards/accuracy_reward_word": 0.6015709196076249,
86
+ "rewards/format_reward": 0.0,
87
+ "step": 30
88
+ },
89
+ {
90
+ "completion_length": 175.47266426086426,
91
+ "epoch": 5.975609756097561,
92
+ "grad_norm": 0.7226300835609436,
93
+ "kl": 0.44776611328125,
94
+ "learning_rate": 1.9659258262890683e-05,
95
+ "loss": 0.0179,
96
+ "reward": 0.6118914943188429,
97
+ "reward_std": 0.1827767850831151,
98
+ "rewards/accuracy_reward_word": 0.6118914864957332,
99
+ "rewards/format_reward": 0.0,
100
+ "step": 35
101
+ },
102
+ {
103
+ "completion_length": 131.66680838844994,
104
+ "epoch": 6.780487804878049,
105
+ "grad_norm": 18.362396240234375,
106
+ "kl": 0.5860558712121212,
107
+ "learning_rate": 1.9396926207859085e-05,
108
+ "loss": 0.0194,
109
+ "reward": 0.6025556867772882,
110
+ "reward_std": 0.16030028168902252,
111
+ "rewards/accuracy_reward_word": 0.6025556894865903,
112
+ "rewards/format_reward": 0.0,
113
+ "step": 40
114
+ },
115
+ {
116
+ "completion_length": 166.34443363998875,
117
+ "epoch": 7.585365853658536,
118
+ "grad_norm": 0.8004411458969116,
119
+ "kl": 0.518850615530303,
120
+ "learning_rate": 1.9063077870366504e-05,
121
+ "loss": 0.0171,
122
+ "reward": 0.6216023144396868,
123
+ "reward_std": 0.20047903162511913,
124
+ "rewards/accuracy_reward_word": 0.6216023090210828,
125
+ "rewards/format_reward": 0.0,
126
+ "step": 45
127
+ },
128
+ {
129
+ "completion_length": 264.6024202289003,
130
+ "epoch": 8.390243902439025,
131
+ "grad_norm": 0.4066580832004547,
132
+ "kl": 0.6091086647727273,
133
+ "learning_rate": 1.866025403784439e-05,
134
+ "loss": 0.0201,
135
+ "reward": 0.6114482121034102,
136
+ "reward_std": 0.2318851254654653,
137
+ "rewards/accuracy_reward_word": 0.6114482130065109,
138
+ "rewards/format_reward": 0.0,
139
+ "step": 50
140
+ },
141
+ {
142
+ "completion_length": 190.6026874889027,
143
+ "epoch": 9.195121951219512,
144
+ "grad_norm": 0.17414061725139618,
145
+ "kl": 0.6156486742424242,
146
+ "learning_rate": 1.819152044288992e-05,
147
+ "loss": 0.0203,
148
+ "reward": 0.6352746134454553,
149
+ "reward_std": 0.17895382462125836,
150
+ "rewards/accuracy_reward_word": 0.6352746274435159,
151
+ "rewards/format_reward": 0.0,
152
+ "step": 55
153
+ },
154
+ {
155
+ "completion_length": 203.3459234526663,
156
+ "epoch": 10.0,
157
+ "grad_norm": 0.20065073668956757,
158
+ "kl": 0.6055279356060606,
159
+ "learning_rate": 1.766044443118978e-05,
160
+ "loss": 0.02,
161
+ "reward": 0.6350559790929159,
162
+ "reward_std": 0.17616610262881627,
163
+ "rewards/accuracy_reward_word": 0.6350559863177213,
164
+ "rewards/format_reward": 0.0,
165
+ "step": 60
166
+ },
167
+ {
168
+ "completion_length": 185.47132530212403,
169
+ "epoch": 10.975609756097562,
170
+ "grad_norm": 0.14110271632671356,
171
+ "kl": 0.5395751953125,
172
+ "learning_rate": 1.7071067811865477e-05,
173
+ "loss": 0.0216,
174
+ "reward": 0.6301242753863334,
175
+ "reward_std": 0.16279728673398494,
176
+ "rewards/accuracy_reward_word": 0.6301242724061012,
177
+ "rewards/format_reward": 0.0,
178
+ "step": 65
179
+ },
180
+ {
181
+ "completion_length": 151.86269656094638,
182
+ "epoch": 11.78048780487805,
183
+ "grad_norm": 0.15463927388191223,
184
+ "kl": 0.5140269886363636,
185
+ "learning_rate": 1.6427876096865394e-05,
186
+ "loss": 0.017,
187
+ "reward": 0.6122245905977307,
188
+ "reward_std": 0.13943401035485845,
189
+ "rewards/accuracy_reward_word": 0.61222458969463,
190
+ "rewards/format_reward": 0.0,
191
+ "step": 70
192
+ },
193
+ {
194
+ "completion_length": 178.9215455488725,
195
+ "epoch": 12.585365853658537,
196
+ "grad_norm": 0.18595437705516815,
197
+ "kl": 0.42743844696969696,
198
+ "learning_rate": 1.573576436351046e-05,
199
+ "loss": 0.0141,
200
+ "reward": 0.6210114558537801,
201
+ "reward_std": 0.14690511865597783,
202
+ "rewards/accuracy_reward_word": 0.6210114535960284,
203
+ "rewards/format_reward": 0.0,
204
+ "step": 75
205
+ },
206
+ {
207
+ "completion_length": 198.75474432742956,
208
+ "epoch": 13.390243902439025,
209
+ "grad_norm": 0.14770109951496124,
210
+ "kl": 0.3772194602272727,
211
+ "learning_rate": 1.5000000000000002e-05,
212
+ "loss": 0.0125,
213
+ "reward": 0.6053550211769162,
214
+ "reward_std": 0.15184391656834068,
215
+ "rewards/accuracy_reward_word": 0.6053550184676142,
216
+ "rewards/format_reward": 0.0,
217
+ "step": 80
218
+ },
219
+ {
220
+ "completion_length": 184.9996028090968,
221
+ "epoch": 14.195121951219512,
222
+ "grad_norm": 0.13854625821113586,
223
+ "kl": 0.3505267518939394,
224
+ "learning_rate": 1.4226182617406996e-05,
225
+ "loss": 0.0116,
226
+ "reward": 0.6068314526117209,
227
+ "reward_std": 0.1331417437529925,
228
+ "rewards/accuracy_reward_word": 0.6068314548694727,
229
+ "rewards/format_reward": 0.0,
230
+ "step": 85
231
+ },
232
+ {
233
+ "completion_length": 178.54586884469697,
234
+ "epoch": 15.0,
235
+ "grad_norm": 0.5907576680183411,
236
+ "kl": 0.4736032196969697,
237
+ "learning_rate": 1.342020143325669e-05,
238
+ "loss": 0.0157,
239
+ "reward": 0.606158793424115,
240
+ "reward_std": 0.13893395368800018,
241
+ "rewards/accuracy_reward_word": 0.6061588015520212,
242
+ "rewards/format_reward": 0.0,
243
+ "step": 90
244
+ },
245
+ {
246
+ "completion_length": 171.66697311401367,
247
+ "epoch": 15.975609756097562,
248
+ "grad_norm": 53.438209533691406,
249
+ "kl": 13.038330078125,
250
+ "learning_rate": 1.2588190451025209e-05,
251
+ "loss": 0.5218,
252
+ "reward": 0.5597633935511113,
253
+ "reward_std": 0.10934587656520307,
254
+ "rewards/accuracy_reward_word": 0.5597633916884661,
255
+ "rewards/format_reward": 0.0,
256
+ "step": 95
257
+ },
258
+ {
259
+ "completion_length": 176.56074940074575,
260
+ "epoch": 16.78048780487805,
261
+ "grad_norm": 0.5583186745643616,
262
+ "kl": 0.4050662878787879,
263
+ "learning_rate": 1.1736481776669307e-05,
264
+ "loss": 0.0134,
265
+ "reward": 0.5518235546169858,
266
+ "reward_std": 0.10686868465872425,
267
+ "rewards/accuracy_reward_word": 0.55182356003559,
268
+ "rewards/format_reward": 0.0,
269
+ "step": 100
270
+ },
271
+ {
272
+ "epoch": 16.78048780487805,
273
+ "eval_completion_length": 190.2759769984654,
274
+ "eval_kl": 0.32310267857142855,
275
+ "eval_loss": 0.014612293802201748,
276
+ "eval_reward": 0.5988541117736271,
277
+ "eval_reward_std": 0.07925501785108022,
278
+ "eval_rewards/accuracy_reward_word": 0.5988540819713047,
279
+ "eval_rewards/format_reward": 0.0,
280
+ "eval_runtime": 96.8362,
281
+ "eval_samples_per_second": 2.065,
282
+ "eval_steps_per_second": 0.01,
283
+ "step": 100
284
+ },
285
+ {
286
+ "completion_length": 218.19211023504084,
287
+ "epoch": 17.585365853658537,
288
+ "grad_norm": 54.766273498535156,
289
+ "kl": 8.253255208333334,
290
+ "learning_rate": 1.0871557427476585e-05,
291
+ "loss": 0.2714,
292
+ "reward": 0.5985936960487654,
293
+ "reward_std": 0.14751893265003507,
294
+ "rewards/accuracy_reward_word": 0.5985937005642689,
295
+ "rewards/format_reward": 0.0,
296
+ "step": 105
297
+ },
298
+ {
299
+ "completion_length": 254.85891122529,
300
+ "epoch": 18.390243902439025,
301
+ "grad_norm": 3.6129367351531982,
302
+ "kl": 5.241092566287879,
303
+ "learning_rate": 1e-05,
304
+ "loss": 0.1729,
305
+ "reward": 0.629414488420342,
306
+ "reward_std": 0.18181673561533293,
307
+ "rewards/accuracy_reward_word": 0.6294144843563889,
308
+ "rewards/format_reward": 0.0,
309
+ "step": 110
310
+ },
311
+ {
312
+ "completion_length": 263.85566780783915,
313
+ "epoch": 19.195121951219512,
314
+ "grad_norm": 1.826474905014038,
315
+ "kl": 0.8953006628787878,
316
+ "learning_rate": 9.128442572523418e-06,
317
+ "loss": 0.0296,
318
+ "reward": 0.6560273572350993,
319
+ "reward_std": 0.2060928950932893,
320
+ "rewards/accuracy_reward_word": 0.6560273635568041,
321
+ "rewards/format_reward": 0.0,
322
+ "step": 115
323
+ },
324
+ {
325
+ "completion_length": 203.282883384011,
326
+ "epoch": 20.0,
327
+ "grad_norm": 0.5897179841995239,
328
+ "kl": 1.3379794034090908,
329
+ "learning_rate": 8.263518223330698e-06,
330
+ "loss": 0.0442,
331
+ "reward": 0.6160052671576991,
332
+ "reward_std": 0.1589522831367724,
333
+ "rewards/accuracy_reward_word": 0.6160052716732025,
334
+ "rewards/format_reward": 0.0,
335
+ "step": 120
336
+ },
337
+ {
338
+ "completion_length": 161.04855613708497,
339
+ "epoch": 20.975609756097562,
340
+ "grad_norm": 0.2893044650554657,
341
+ "kl": 0.331640625,
342
+ "learning_rate": 7.411809548974792e-06,
343
+ "loss": 0.0133,
344
+ "reward": 0.5946122907102108,
345
+ "reward_std": 0.12153792111203074,
346
+ "rewards/accuracy_reward_word": 0.5946122877299785,
347
+ "rewards/format_reward": 0.0,
348
+ "step": 125
349
+ },
350
+ {
351
+ "completion_length": 154.29532727328214,
352
+ "epoch": 21.78048780487805,
353
+ "grad_norm": 0.17429320514202118,
354
+ "kl": 0.28111683238636365,
355
+ "learning_rate": 6.579798566743314e-06,
356
+ "loss": 0.0093,
357
+ "reward": 0.577382534290805,
358
+ "reward_std": 0.11978449254776492,
359
+ "rewards/accuracy_reward_word": 0.5773825306784023,
360
+ "rewards/format_reward": 0.0,
361
+ "step": 130
362
+ },
363
+ {
364
+ "completion_length": 159.07292406486744,
365
+ "epoch": 22.585365853658537,
366
+ "grad_norm": 0.32492542266845703,
367
+ "kl": 0.2540838068181818,
368
+ "learning_rate": 5.773817382593008e-06,
369
+ "loss": 0.0084,
370
+ "reward": 0.5901522681568608,
371
+ "reward_std": 0.11932384177590862,
372
+ "rewards/accuracy_reward_word": 0.5901522708661628,
373
+ "rewards/format_reward": 0.0,
374
+ "step": 135
375
+ },
376
+ {
377
+ "completion_length": 175.563588922674,
378
+ "epoch": 23.390243902439025,
379
+ "grad_norm": 1.4657223224639893,
380
+ "kl": 0.2944040009469697,
381
+ "learning_rate": 5.000000000000003e-06,
382
+ "loss": 0.0098,
383
+ "reward": 0.5877998123566309,
384
+ "reward_std": 0.12781856005842035,
385
+ "rewards/accuracy_reward_word": 0.5877998132597316,
386
+ "rewards/format_reward": 0.0,
387
+ "step": 140
388
+ },
389
+ {
390
+ "completion_length": 196.80899186567828,
391
+ "epoch": 24.195121951219512,
392
+ "grad_norm": 0.38781723380088806,
393
+ "kl": 0.3389707623106061,
394
+ "learning_rate": 4.264235636489542e-06,
395
+ "loss": 0.0112,
396
+ "reward": 0.6222013411196795,
397
+ "reward_std": 0.1575910769628756,
398
+ "rewards/accuracy_reward_word": 0.6222013402165789,
399
+ "rewards/format_reward": 0.0,
400
+ "step": 145
401
+ },
402
+ {
403
+ "completion_length": 212.34213649865353,
404
+ "epoch": 25.0,
405
+ "grad_norm": 0.2279985398054123,
406
+ "kl": 0.3564157196969697,
407
+ "learning_rate": 3.5721239031346067e-06,
408
+ "loss": 0.0118,
409
+ "reward": 0.6341576734275529,
410
+ "reward_std": 0.16485171076474767,
411
+ "rewards/accuracy_reward_word": 0.6341576770399556,
412
+ "rewards/format_reward": 0.0,
413
+ "step": 150
414
+ },
415
+ {
416
+ "completion_length": 198.5949857711792,
417
+ "epoch": 25.975609756097562,
418
+ "grad_norm": 0.14645038545131683,
419
+ "kl": 0.32952880859375,
420
+ "learning_rate": 2.9289321881345257e-06,
421
+ "loss": 0.0132,
422
+ "reward": 0.6285704858601093,
423
+ "reward_std": 0.16049452810548245,
424
+ "rewards/accuracy_reward_word": 0.6285704836249352,
425
+ "rewards/format_reward": 0.0,
426
+ "step": 155
427
+ },
428
+ {
429
+ "completion_length": 189.9249274513938,
430
+ "epoch": 26.78048780487805,
431
+ "grad_norm": 0.13024762272834778,
432
+ "kl": 0.3255800189393939,
433
+ "learning_rate": 2.339555568810221e-06,
434
+ "loss": 0.0108,
435
+ "reward": 0.6150937617728205,
436
+ "reward_std": 0.14573924322471474,
437
+ "rewards/accuracy_reward_word": 0.6150937717069279,
438
+ "rewards/format_reward": 0.0,
439
+ "step": 160
440
+ },
441
+ {
442
+ "completion_length": 174.02354685465494,
443
+ "epoch": 27.585365853658537,
444
+ "grad_norm": 0.13969573378562927,
445
+ "kl": 0.3170572916666667,
446
+ "learning_rate": 1.808479557110081e-06,
447
+ "loss": 0.0105,
448
+ "reward": 0.6073169595364368,
449
+ "reward_std": 0.12986301354160815,
450
+ "rewards/accuracy_reward_word": 0.607316970373645,
451
+ "rewards/format_reward": 0.0,
452
+ "step": 165
453
+ },
454
+ {
455
+ "completion_length": 181.3133195819277,
456
+ "epoch": 28.390243902439025,
457
+ "grad_norm": 0.21579568088054657,
458
+ "kl": 0.3319720643939394,
459
+ "learning_rate": 1.339745962155613e-06,
460
+ "loss": 0.011,
461
+ "reward": 0.6149095554243434,
462
+ "reward_std": 0.13368092341856522,
463
+ "rewards/accuracy_reward_word": 0.6149095631006992,
464
+ "rewards/format_reward": 0.0,
465
+ "step": 170
466
+ },
467
+ {
468
+ "completion_length": 179.86851408987334,
469
+ "epoch": 29.195121951219512,
470
+ "grad_norm": 0.1424599587917328,
471
+ "kl": 0.32353811553030304,
472
+ "learning_rate": 9.369221296335007e-07,
473
+ "loss": 0.0107,
474
+ "reward": 0.6067391426274271,
475
+ "reward_std": 0.14061830226670613,
476
+ "rewards/accuracy_reward_word": 0.6067391435305277,
477
+ "rewards/format_reward": 0.0,
478
+ "step": 175
479
+ },
480
+ {
481
+ "completion_length": 177.73133919455788,
482
+ "epoch": 30.0,
483
+ "grad_norm": 0.12455170601606369,
484
+ "kl": 0.32202888257575757,
485
+ "learning_rate": 6.030737921409169e-07,
486
+ "loss": 0.0107,
487
+ "reward": 0.6030513856447104,
488
+ "reward_std": 0.13355868055739187,
489
+ "rewards/accuracy_reward_word": 0.6030513928695158,
490
+ "rewards/format_reward": 0.0,
491
+ "step": 180
492
+ },
493
+ {
494
+ "completion_length": 178.40078887939453,
495
+ "epoch": 30.975609756097562,
496
+ "grad_norm": 0.14259420335292816,
497
+ "kl": 0.315283203125,
498
+ "learning_rate": 3.4074173710931804e-07,
499
+ "loss": 0.0127,
500
+ "reward": 0.6099405620247126,
501
+ "reward_std": 0.13809194271452724,
502
+ "rewards/accuracy_reward_word": 0.6099405620247126,
503
+ "rewards/format_reward": 0.0,
504
+ "step": 185
505
+ },
506
+ {
507
+ "completion_length": 174.6302841648911,
508
+ "epoch": 31.78048780487805,
509
+ "grad_norm": 0.1332584172487259,
510
+ "kl": 0.3048354640151515,
511
+ "learning_rate": 1.519224698779198e-07,
512
+ "loss": 0.0101,
513
+ "reward": 0.5993030166084116,
514
+ "reward_std": 0.1328924118795178,
515
+ "rewards/accuracy_reward_word": 0.5993030138991096,
516
+ "rewards/format_reward": 0.0,
517
+ "step": 190
518
+ },
519
+ {
520
+ "completion_length": 184.39110634543678,
521
+ "epoch": 32.58536585365854,
522
+ "grad_norm": 0.13871651887893677,
523
+ "kl": 0.31865530303030304,
524
+ "learning_rate": 3.805301908254455e-08,
525
+ "loss": 0.0106,
526
+ "reward": 0.6105609360066327,
527
+ "reward_std": 0.14098199039246095,
528
+ "rewards/accuracy_reward_word": 0.610560937812834,
529
+ "rewards/format_reward": 0.0,
530
+ "step": 195
531
+ },
532
+ {
533
+ "completion_length": 184.56710722952178,
534
+ "epoch": 33.390243902439025,
535
+ "grad_norm": 0.2092735767364502,
536
+ "kl": 0.3228574810606061,
537
+ "learning_rate": 0.0,
538
+ "loss": 0.0107,
539
+ "reward": 0.6148976295283346,
540
+ "reward_std": 0.1421807540975737,
541
+ "rewards/accuracy_reward_word": 0.614897631334536,
542
+ "rewards/format_reward": 0.0,
543
+ "step": 200
544
+ },
545
+ {
546
+ "epoch": 33.390243902439025,
547
+ "eval_completion_length": 183.40406145368303,
548
+ "eval_kl": 0.3189174107142857,
549
+ "eval_loss": 0.014444979839026928,
550
+ "eval_reward": 0.6251552956444877,
551
+ "eval_reward_std": 0.1085951988186155,
552
+ "eval_rewards/accuracy_reward_word": 0.6251552700996399,
553
+ "eval_rewards/format_reward": 0.0,
554
+ "eval_runtime": 95.3621,
555
+ "eval_samples_per_second": 2.097,
556
+ "eval_steps_per_second": 0.01,
557
+ "step": 200
558
+ },
559
+ {
560
+ "epoch": 33.390243902439025,
561
+ "step": 200,
562
+ "total_flos": 0.0,
563
+ "train_loss": 0.036479350634126606,
564
+ "train_runtime": 34004.8654,
565
+ "train_samples_per_second": 1.529,
566
+ "train_steps_per_second": 0.006
567
+ }
568
+ ],
569
+ "logging_steps": 5,
570
+ "max_steps": 200,
571
+ "num_input_tokens_seen": 0,
572
+ "num_train_epochs": 40,
573
+ "save_steps": 500,
574
+ "stateful_callbacks": {
575
+ "TrainerControl": {
576
+ "args": {
577
+ "should_epoch_stop": false,
578
+ "should_evaluate": false,
579
+ "should_log": false,
580
+ "should_save": false,
581
+ "should_training_stop": false
582
+ },
583
+ "attributes": {}
584
+ }
585
+ },
586
+ "total_flos": 0.0,
587
+ "train_batch_size": 16,
588
+ "trial_name": null,
589
+ "trial_params": null
590
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515f14bdee751293bb315b8e548550983d747382833c2d5a27a5780db6fe0b7e
3
+ size 7544