shahidul034 commited on Feb 15

Commit

034cb04

verified ·

1 Parent(s): e00ff48

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/fsdp_config.json +4 -0
code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/generation_config.json +13 -0
code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/tokenizer_config.json +239 -0
code/RL_model/models/RL_model_only_subclaim_test/latest_checkpointed_iteration.txt +1 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/fsdp_config.json +4 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/added_tokens.json +28 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/chat_template.jinja +61 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/config.json +68 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/generation_config.json +13 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/merges.txt +0 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/special_tokens_map.json +31 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/tokenizer_config.json +239 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/vocab.json +0 -0
code/RL_model/models/RL_model_subclaim_classifier_v1/latest_checkpointed_iteration.txt +1 -0
code/RL_model/verl/Search-R1/misc/docs/experiment_log.md +47 -0
code/RL_model/verl/Search-R1/misc/docs/multinode.md +134 -0
code/RL_model/verl/Search-R1/misc/docs/retriever.md +128 -0
code/RL_model/verl/Search-R1/misc/example/case.txt +43 -0
code/RL_model/verl/Search-R1/misc/example/corpus.jsonl +10 -0
code/RL_model/verl/Search-R1/misc/example/multinode/train_grpo_multinode_32b.sh +77 -0
code/RL_model/verl/Search-R1/misc/example/multinode/train_grpo_multinode_72b.sh +75 -0
code/RL_model/verl/Search-R1/misc/example/multinode/train_ppo_multinode_32b.sh +84 -0
code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_ann.sh +12 -0
code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_bm25.sh +10 -0
code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_google.sh +8 -0
code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_hierarchical.sh +17 -0
code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_serpapi.sh +7 -0
code/RL_model/verl/Search-R1/misc/public/status.png +0 -0
code/RL_model/verl/Search-R1/misc/public/worker.png +0 -0
code/RL_model/verl/Search-R1/misc/scripts/data_process/nq.py +100 -0
code/RL_model/verl/Search-R1/misc/scripts/data_process/nq_rag.py +141 -0
code/RL_model/verl/Search-R1/misc/scripts/data_process/nq_search.py +101 -0
code/RL_model/verl/Search-R1/misc/scripts/data_process/qa_search_test_merge.py +115 -0
code/RL_model/verl/Search-R1/misc/scripts/data_process/qa_search_train_merge.py +105 -0
code/RL_model/verl/Search-R1/misc/scripts/download.py +25 -0
code/RL_model/verl/Search-R1/misc/scripts/download.sh +6 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/README.md +42 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/data_process.sh +10 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/evaluate.sh +65 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.1/train_grpo.sh +84 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.1/train_ppo.sh +92 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.2/train_grpo.sh +79 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.2/train_ppo.sh +88 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.3/train_grpo_format.sh +87 -0
code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.3/train_ppo_format.sh +94 -0
code/RL_model/verl/Search-R1/misc/scripts/upload.py +12 -0
code/RL_model/verl/Search-R1/misc/scripts/upload.sh +6 -0
code/RL_model/verl/Search-R1/verl/models/README.md +35 -0
code/RL_model/verl/Search-R1/verl/models/__init__.py +13 -0
code/RL_model/verl/Search-R1/verl/models/llama/__init__.py +13 -0

code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/fsdp_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "FSDP_version": 1,
+    "world_size": 2
+}

code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.56.1"
+}

code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 1010000,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

code/RL_model/models/RL_model_only_subclaim_test/latest_checkpointed_iteration.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 60

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/fsdp_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "FSDP_version": 1,
+    "world_size": 2
+}

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,61 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/config.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "float32",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 262144,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "pad_token_id": 151643,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 5000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.56.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.56.1"
+}

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 1010000,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

code/RL_model/models/RL_model_subclaim_classifier_v1/global_step_45/actor/huggingface/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

code/RL_model/models/RL_model_subclaim_classifier_v1/latest_checkpointed_iteration.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 45

code/RL_model/verl/Search-R1/misc/docs/experiment_log.md ADDED Viewed

	@@ -0,0 +1,47 @@

+## Experiment log
+### Preliminary results
+Resources: [wandb](https://wandb.ai/peterjin/Search-R1-open)
+The preliminary experiment is conducted only on natural question (NQ) dataset (+ PPO) with a small number of training steps.
+### v0.1
+Resources: [wandb](https://wandb.ai/peterjin/Search-R1-nq_hotpotqa_train), [docs](https://github.com/PeterGriffinJin/Search-R1/tree/main/scripts/nq_hotpotqa), [scripts](https://github.com/PeterGriffinJin/Search-R1/tree/main/scripts/nq_hotpotqa/v0.1)
+We extend the experiments from NQ to seven datasets with both PPO and GRPO methods. The studies are still on a small number of training steps with a big learning rate warm up ratio.
+### v0.2
+Resources: [wandb](https://wandb.ai/peterjin/Search-R1-v0.2), [docs](https://github.com/PeterGriffinJin/Search-R1/tree/main/scripts/nq_hotpotqa), [scripts](https://github.com/PeterGriffinJin/Search-R1/tree/main/scripts/nq_hotpotqa/v0.2), [paper](https://arxiv.org/abs/2503.09516)
+We fix several bugs including [retrieved token masking](https://github.com/PeterGriffinJin/Search-R1/pull/21) and [GRPO sample indexing](https://github.com/PeterGriffinJin/Search-R1/commit/9ec2fa9892fbf0315d0c67b4dc08ae8f6cf5f378).
+The former can largely improve the stablity of RL training.
+Then we adjust the training scripts, increasing the number of training steps and decreasing the learning rate warm up ratio, to obtain a better performance, and conduct experiments on different scale of LLMs (3B, 7B, 14B).
+### v0.3
+Resources: [wandb](https://wandb.ai/peterjin/Search-R1-v0.3), [docs](https://github.com/PeterGriffinJin/Search-R1/tree/main/scripts/nq_hotpotqa), [scripts](https://github.com/PeterGriffinJin/Search-R1/tree/main/scripts/nq_hotpotqa/v0.3), [paper](https://arxiv.org/abs/2505.15117)
+We conduct studies on (1) reward design; (2) LLM backbone; and (3) search engine.
+- Reward design
+  - Format reward
+  - Intermediate retrieval reward
+- LLM backbone
+  - LLM type (e.g., general LLM or reasoning LLM)
+  - LLM scale (3B/7B/14B/32B)
+- Search engine
+  - RL training dynamics
+  - generalization during inference
+- Data scaling
+Details can be found in the [paper](https://arxiv.org/abs/2505.15117).

code/RL_model/verl/Search-R1/misc/docs/multinode.md ADDED Viewed

	@@ -0,0 +1,134 @@

+## Multinode Training
+Our codebase supports multi-node training for large-scale language models. The implementation is mainly based on [Ray](https://github.com/ray-project/ray).
+There are two types of nodes when doing Ray multi-node training: (1) head node and (2) worker nodes.
+There is only one head node where you will start the ray cluster and submit the job.
+The other nodes are worker nodes, where you only need to start and register to the ray cluster.
+### Step 1: Set up multinode ray cluster (from [link](https://verl.readthedocs.io/en/latest/start/multinode.html#set-up-multinode-ray-cluster))
+a. Start **head** node with ```ray start --head --dashboard-host=0.0.0.0```, there’re 2 address you should care about:
+- GCS address: ```ray start --address=<address>```, where **worker** node should connect to.
+- Dashboard address: ```<address>:8265```, where you should submit job to the cluster.
+![head](../public/head.png)
+b. Start **worker node** and register it to the ray cluster with ```ray start --address=<address>``` you get above.
+![worker](../public/worker.png)
+c. Check the cluster status with ```ray status```.
+For example, if you have two nodes (each with 8 GPUs) in the cluster, you should see something like this:
+![status](../public/status.png)
+### Step 2: Launch the retrieval server on every node.
+We would recommend launch the **same** retrieval server on every nodes (including both head and worker nodes) for the stable RL training. Detailed information on how to launch different retrievers can be found as follows: [doc](https://github.com/PeterGriffinJin/Search-R1/blob/main/docs/retriever.md) and [scripts](https://github.com/PeterGriffinJin/Search-R1/tree/main/example/retriever).
+For example, if you want to launch the local dense retriever with flat indexing, run the following command on **every** nodes:
+```
+bash retrieval_launch.sh
+```
+### Step 3: Start the job
+After the retrievers are launched, you can start the training job. You only need to start the job on the ***head*** node.
+An example script is shown as below. Change ```RAY_DASHBOARD_ADDRESS``` and ```N_NODES``` to your dashboard address found in step 1 and the number of nodes respectively.
+More script examples can be found [here](https://github.com/PeterGriffinJin/Search-R1/tree/main/example/multinode).
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR='data/nq_search'
+WAND_PROJECT="Search-R1-release"
+RAY_DASHBOARD_ADDRESS="<address>:8265"
+N_NODES=2
+export BASE_MODEL='Qwen/Qwen2.5-7B'
+export EXPERIMENT_NAME=${train_data}-${test_data}-search-r1-ppo-qwen2.5-7b-em-multinode-$N_NODES
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS
+ulimit -n 65535
+ray job submit --address=$RAY_DASHBOARD_ADDRESS \
+    --runtime-env=verl/trainer/runtime_env.yaml \
+    --no-wait \
+    -- \
+    python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=gae \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=False \
+    actor_rollout_ref.rollout.n_agent=1 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.rollout.top_p=1.0 \
+    actor_rollout_ref.actor.state_masking=true \
+    critic.optim.lr=1e-5 \
+    critic.model.use_remove_padding=True \
+    critic.optim.lr_warmup_steps_ratio=0.015 \
+    critic.model.path=$BASE_MODEL \
+    critic.model.enable_gradient_checkpointing=true \
+    critic.ppo_micro_batch_size=16 \
+    critic.model.fsdp_config.param_offload=False \
+    critic.model.fsdp_config.grad_offload=False \
+    critic.model.fsdp_config.optimizer_offload=False \
+    algorithm.kl_ctrl.kl_coef=0.001 \
+    algorithm.no_think_rl=false \
+    trainer.critic_warmup=0 \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=false \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=$N_NODES \
+    trainer.save_freq=100 \
+    trainer.test_freq=100 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=1005 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee $EXPERIMENT_NAME.log
+```

code/RL_model/verl/Search-R1/misc/docs/retriever.md ADDED Viewed

	@@ -0,0 +1,128 @@

+## Search Engine
+In this document, we provide examples of how to launch different retrievers, including local sparse retriever (e.g., BM25), local dense retriever (e.g., e5) and online search engine.
+For local retrievers, we use [wiki-18](https://huggingface.co/datasets/PeterJinGo/wiki-18-corpus) corpus as an example and the corpus indexing can be found at [bm25](https://huggingface.co/datasets/PeterJinGo/wiki-18-bm25-index), [e5-flat](https://huggingface.co/datasets/PeterJinGo/wiki-18-e5-index), [e5-HNSW64](https://huggingface.co/datasets/PeterJinGo/wiki-18-e5-index-HNSW64).
+### How to choose the retriever?
+- If you have a private or domain-specific corpus, choose **local retriever**.
+    - If there is no high quality embedding-based retrievers (dense retrievers) in your domain, choose **sparse local retriever** (e.g., BM25).
+    - Otherwise choose **dense local retriever**.
+        - If you do not have sufficent GPUs to conduct exact dense embedding matching, choose **ANN indexing** on CPUs.
+        - If you have sufficient GPUs, choose **flat indexing** on GPUs.
+- If you want to train a general LLM search agent and have enough funding, choose **online search engine** (e.g., [SerpAPI](https://serpapi.com/)).
+- If you have a domain specific online search engine (e.g., PubMed search), you can refer to [link](https://github.com/PeterGriffinJin/Search-R1/blob/main/search_r1/search/serp_search_server.py) to integrate it to Search-R1 by yourself.
+Search engine launching scripts can be found at [link](https://github.com/PeterGriffinJin/Search-R1/tree/main/example/retriever).
+### Local Sparse Retriever
+Sparse retriever (e.g., bm25) is a traditional method. The retrieval process is very efficient and no GPUs are needed. However, it may not be as accurate as dense retrievers in some specific domain.
+(1) Download the indexing.
+```bash
+save_path=/your/path/to/save
+huggingface-cli download PeterJinGo/wiki-18-bm25-index --repo-type dataset --local-dir $save_path
+```
+(2) Launch a local BM25 retriever server.
+```bash
+conda activate retriever
+index_file=$save_path/bm25
+corpus_file=$save_path/wiki-18.jsonl
+retriever_name=bm25
+python search_r1/search/retrieval_server.py --index_path $index_file --corpus_path $corpus_file --topk 3 --retriever_name $retriever_name
+```
+### Local Dense Retriever
+You can also adopt some off-the-shelf dense retrievers, e.g., e5. These models are much stronger than sparse retriever in some specific domains.
+If you have sufficient GPU, we would recommend the flat indexing variant below, otherwise you can adopt the ANN variant.
+#### Flat indexing
+Flat indexing conducts exact embedding match, which is slow but very accurate. To make it efficient enough to support online RL, we would recommend enable **GPU** usage by ```--faiss_gpu```.
+(1) Download the indexing and corpus.
+```bash
+save_path=/the/path/to/save
+python scripts/download.py --save_path $save_path
+cat $save_path/part_* > $save_path/e5_Flat.index
+gzip -d $save_path/wiki-18.jsonl.gz
+```
+(2) Launch a local flat e5 retriever server.
+```bash
+conda activate retriever
+index_file=$save_path/e5_Flat.index
+corpus_file=$save_path/wiki-18.jsonl
+retriever_name=e5
+retriever_path=intfloat/e5-base-v2
+python search_r1/search/retrieval_server.py --index_path $index_file --corpus_path $corpus_file --topk 3 --retriever_name $retriever_name --retriever_model $retriever_path --faiss_gpu
+```
+#### ANN indexing (HNSW64)
+To improve the search efficient with only **CPU**, you can adopt approximate nearest neighbor (ANN) indexing, e.g., with HNSW64.
+It is very efficient, but may not be as accurate as flat indexing, especially when the number of retrieved passages is small.
+(1) Download the indexing.
+```bash
+save_path=/the/path/to/save
+huggingface-cli download PeterJinGo/wiki-18-e5-index-HNSW64 --repo-type dataset --local-dir $save_path
+cat $save_path/part_* > $save_path/e5_HNSW64.index
+```
+(2) Launch a local ANN dense retriever server.
+```bash
+conda activate retriever
+index_file=$save_path/e5_HNSW64.index
+corpus_file=$save_path/wiki-18.jsonl
+retriever_name=e5
+retriever_path=intfloat/e5-base-v2
+python search_r1/search/retrieval_server.py --index_path $index_file --corpus_path $corpus_file --topk 3 --retriever_name $retriever_name --retriever_model $retriever_path
+```
+### Online Search Engine
+We support both [Google Search API](https://developers.google.com/custom-search/v1/overview) and [SerpAPI](https://serpapi.com/). We would recommend [SerpAPI](https://serpapi.com/) since it integrates multiple online search engine APIs (including Google, Bing, Baidu, etc) and does not have a monthly quota limitation ([Google Search API](https://developers.google.com/custom-search/v1/overview) has a hard 10k monthly quota, which is not sufficient to fulfill online LLM RL training).
+#### SerAPI online search server
+```bash
+search_url=https://serpapi.com/search
+serp_api_key="" # put your serp api key here (https://serpapi.com/)
+python search_r1/search/serp_search_server.py --search_url $search_url --topk 3 --serp_api_key $serp_api_key
+```
+#### Google online search server
+```bash
+api_key="" # put your google custom API key here (https://developers.google.com/custom-search/v1/overview)
+cse_id="" # put your google cse API key here (https://developers.google.com/custom-search/v1/overview)
+python search_r1/search/google_search_server.py --api_key $api_key --topk 5 --cse_id $cse_id --snippet_only
+```

code/RL_model/verl/Search-R1/misc/example/case.txt ADDED Viewed

	@@ -0,0 +1,43 @@

+<|im_start|>system
+You are a helpful assistant.<|im_end|>
+<|im_start|>user
+Answer the given question. You must conduct reasoning inside <think> and </think> first every time you get new information. After reasoning, if you find you lack some knowledge, you can call a search engine by <search> query </search> and it will return the top searched results between <information> and </information>. You can search as many times as your want. If you find no further external knowledge needed, you can directly provide the answer inside <answer> and </answer>, without detailed illustrations. For example, <answer> Beijing </answer>. Question: paul walkers cars in fast and furious list?
+<|im_end|>
+<|im_start|>assistant
+<think>I need to search for Paul Walker's cars in Fast and Furious movies.</think>
+<search> Paul Walker's cars in Fast and Furious </search>
+<information>Doc 1(Title: "Paul Walker") also had a large car collection of about 30 cars, a portion of which he co-owned along with Roger Rodas. The collection included (but is not limited to): Roger Rodas became Walker's financial adviser in 2007 and helped to establish Reach Out Worldwide. Rodas, a pro-am racer was the CEO of Always Evolving, a Valencia high-end vehicle performance shop owned by Walker. Walker was also a close friend of his ""2 Fast 2 Furious"" co-star Tyrese Gibson. Vin Diesel considered Walker to be like a brother, both on and off screen, and affectionately called him ""Pablo"". Walker's mother referred to
+Doc 2(Title: "Paul Walker") Paul Walker Paul William Walker IV (September 12, 1973 – November 30, 2013) was an American actor best known for his role as Brian O'Conner in ""The Fast and the Furious"" franchise. Walker first gained prominence in 1999 with roles in the teen films ""She's All That"" and ""Varsity Blues"". In 2001, he gained international fame for his role in the street racing action film ""The Fast and the Furious"" (2001), a role he reprised in five of the next six installments, but died in 2013 in the middle of filming ""Furious 7"" (2015). Walker began his career guest-starring on
+Doc 3(Title: "Paul Walker") of Porsche in a separate lawsuit filed by Roger Rodas' widow, Kristine. The ruling had no bearing on two other cases against Porsche which have been filed by Walker's father, who is also the executor of the actor's estate, and his daughter. Walker's father and daughter both reached an agreement with Porsche. Paul Walker Paul William Walker IV (September 12, 1973 – November 30, 2013) was an American actor best known for his role as Brian O'Conner in ""The Fast and the Furious"" franchise. Walker first gained prominence in 1999 with roles in the teen films ""She's All That"" and</information>
+From the information provided, it's clear that Paul Walker was a part of the "Fast and Furious" series, but the specific list of cars is not mentioned. Since I lack this particular detail, I will call a search engine to get the specific list of cars Paul Walker drove in the "Fast and Furious" movies.
+<search> list of cars paul walker drove in Fast and Furious </search>
+<information>Doc 1(Title: "Paul Walker") also had a large car collection of about 30 cars, a portion of which he co-owned along with Roger Rodas. The collection included (but is not limited to): Roger Rodas became Walker's financial adviser in 2007 and helped to establish Reach Out Worldwide. Rodas, a pro-am racer was the CEO of Always Evolving, a Valencia high-end vehicle performance shop owned by Walker. Walker was also a close friend of his ""2 Fast 2 Furious"" co-star Tyrese Gibson. Vin Diesel considered Walker to be like a brother, both on and off screen, and affectionately called him ""Pablo"". Walker's mother referred to
+Doc 2(Title: "Paul Walker") Paul Walker Paul William Walker IV (September 12, 1973 – November 30, 2013) was an American actor best known for his role as Brian O'Conner in ""The Fast and the Furious"" franchise. Walker first gained prominence in 1999 with roles in the teen films ""She's All That"" and ""Varsity Blues"". In 2001, he gained international fame for his role in the street racing action film ""The Fast and the Furious"" (2001), a role he reprised in five of the next six installments, but died in 2013 in the middle of filming ""Furious 7"" (2015). Walker began his career guest-starring on
+Doc 3(Title: "Paul Walker") of Porsche in a separate lawsuit filed by Roger Rodas' widow, Kristine. The ruling had no bearing on two other cases against Porsche which have been filed by Walker's father, who is also the executor of the actor's estate, and his daughter. Walker's father and daughter both reached an agreement with Porsche. Paul Walker Paul William Walker IV (September 12, 1973 – November 30, 2013) was an American actor best known for his role as Brian O'Conner in ""The Fast and the Furious"" franchise. Walker first gained prominence in 1999 with roles in the teen films ""She's All That"" and</information>
+Based on the updated information, it seems the focus was on his career, financials, and family. However, I am still missing the specific list of cars he drove in the "Fast and Furious" movies. Since it appears that the information might not be contained within the accessed documents, and I have no further search queries to make, I will provide an answer based on the details I have.
+<answer> Charger </answer>
+<|im_start|>system
+You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
+<|im_start|>user
+Answer the given question. You must conduct reasoning inside <think> and </think> first every time you get new information. After reasoning, if you find you lack some knowledge, you can call a search engine by <search> query </search> and it will return the top searched results between <information> and </information>. You can search as many times as your want. If you find no further external knowledge needed, you can directly provide the answer inside <answer> and </answer>, without detailed illustrations. For example, <answer> Beijing </answer>. Question: the heart muscle is stimulated to contract by electrical impulses which are generated where?
+<|im_end|>
+<|im_start|>assistant
+<think> I remember that the heart's electrical impulses originate from a specific part of the heart, but I need to confirm which part.</think>
+<search> origin of heart's electrical impulses </search>
+<information>Doc 1(Title: "Cardiac pacemaker") Cardiac pacemaker The contraction of cardiac muscle (heart muscle) in all animals is initiated by electrical impulses known as action potentials. The rate at which these impulses fire controls the rate of cardiac contraction, that is, the heart rate. The cells that create these rhythmic impulses, setting the pace for blood pumping, are called pacemaker cells, and they directly control the heart rate. They make up the cardiac pacemaker, that is, the natural pacemaker of the heart. In most humans, the concentration of pacemaker cells in the sinoatrial (SA) node is the natural pacemaker, and the resultant rhythm is a
+Doc 2(Title: "Electrical conduction system of the heart") Electrical conduction system of the heart The electrical conduction system of the heart transmits signals generated usually by the sinoatrial node to cause contraction of the heart muscle. The pacemaking signal generated in the sinoatrial node travels through the right atrium to the atrioventricular node, along the Bundle of His and through bundle branches to cause contraction of the heart muscle. This signal stimulates contraction first of the right and left atrium, and then the right and left ventricles. This process allows blood to be pumped throughout the body. The conduction system consists of specialised heart muscle cells, and is
+Doc 3(Title: "Sinoatrial node") Sinoatrial node The sinoatrial node (SA node), also known as sinus node, is a group of cells located in the wall of the right atrium of the heart. These cells have the ability to spontaneously produce an electrical impulse (action potential; see below for more details), that travels through the heart via the electrical conduction system (see figure 1) causing it to contract. In a healthy heart, the SA node continuously produces action potential, setting the rhythm of the heart and so is known as the heart's natural pacemaker. The rate of action potential production (and therefore the heart rate)</information>
+From the information provided, it is clear that the heart's electrical impulses are generated in the sinoatrial (SA) node, which is often referred to as the heart's natural pacemaker. </think>
+<answer> sinoatrial (SA) node </answer>

code/RL_model/verl/Search-R1/misc/example/corpus.jsonl ADDED Viewed

	@@ -0,0 +1,10 @@

+{"id": "0", "contents": "\"Evan Morris\"\nEvan Morris Evan L. Morris (January 26, 1977 \u2013 July 9, 2015) was a lobbyist for Genentech and its parent corporation Roche in Washington, D.C. He began his career in Washington as an intern in the Clinton White House at age 18. He began his lobbying work at Patton Boggs before moving on to Roche in 2005. His early work at Roche involved government policy on Medicare and Medicaid, flu preparedness, and hepatitis C. His work at Genentech is being investigated by the Wall Street Journal, which states that \u201cshaping up to be one of the biggest U.S. investigations into"}
+{"id": "1", "contents": "\"Horatio Hale\"\nconsisted of an Algonkin vocabulary, which he gathered from a band of Indians who had camped on the college grounds. Three years later, when the United States Exploring Expedition to little-known portions of the globe was organised under Charles Wilkes, Hale was recommended, while yet an undergraduate, for the post of ethnologist and philologist, and obtained the appointment. From 1838 to 1842, he was employed in the work of the expedition, visiting South America, Australasia, Polynesia, and North-western America, then known as Oregon. From this point he returned overland. The Hale Passages of Puget Sound were named in recognition of"}
+{"id": "2", "contents": "\"Dibba Al-Hisn\"\nis believed to be the site where the Portuguese during the Habsburg Dynasty built a fort and a wall around the city. In August 1648, the Arabs besieged Muscat, Oman, and on October 31, 1648, a treaty was signed between the two opponents. The terms required the Portuguese to build the fortress of Kuriyat, Dibba Al-Hisn and Matrah (Oman). Note: There have been land disputes between Dibba Al-Hisn and Dibba Al-Baya, but these were resolved in the 1990s. </div> A branch of the Community College of the University of Sharjah is available in Dibba Al-Hisn. Alkhalidya Secondary School Dibba Al-Hisn"}
+{"id": "3", "contents": "\"Ao Oni (film)\"\nthe door as a big blue hand grabs his head. The group hears Takeshi's scream and come back to check on him. As they reach the room, they see the door open and Takeshi, bleeding, is thrown out. He asks for help before being dragged back and the door closes. As the group enters the room, they finds Takeshi's dismembered body. Anna and Hiroshi pressure Mika about Takuro's whereabouts and about Naoki, but Mika refuses to tell them and runs away, chased by Hiroshi. Anna confronts Shun who describes that this event is the same as the game he created."}
+{"id": "4", "contents": "\"Pavia Cathedral\"\non March 17, 1989. The cathedral was begun in 1488, under architect Cristoforo Rocchi, who was soon replaced by Giovanni Antonio Amadeo and Gian Giacomo Dolcebuono. The original project, with a nave and two aisles flanked by semicircular niches and a large central dome, was influenced by Bramante, some details of it later appearing in St. Peter's Basilica in Rome. Leonardo da Vinci is also known to have contributed to the project. In 1521, the altar area was completed by Gianpietrino Rizzi, a pupil of Da Vinci. By the 17th century, the presbytery had been completed but only in the"}
+{"id": "5", "contents": "\"Pavia Cathedral\"\nfollowing century was the tambour built, while the dome itself and the facade had to wait for the 19th century. The dome was designed by Carlo Maciachini and completed in 1885, but partially collapsed the same year. In 1930, construction continued with the two arms of the transept, for which the original plan was followed, although using reinforced concrete (in order to save the remains of the medieval Santa Maria del Popolo). The arms are still missing part of the internal marble decoration. The church is on the Greek Cross plan: it therefore has the same length and width at"}
+{"id": "6", "contents": "\"Iowa Highway 17\"\nWesley. Although Iowa 17 has only been designated since 1969, most of the route has been a part of the primary highway system since the system's inception in 1919. The route was designated Primary Road No. 60 and connected Des Moines and Goldfield via Webster City. By 1930, the only paved section of what was by then called Iowa 60 was the southernmost from Des Moines to south of Madrid. In two years, paving extended to US 30 east of Boone. 1932 saw many changes to Iowa 60. The route was extended to the north where it ended at US"}
+{"id": "7", "contents": "\"E.T. the Extra-Terrestrial (video game)\"\nfinancial failure for Atari. By 2004, the cartridges were still very common and offered at very low prices. While reviews of the movie were highly positive, the game was negatively received by critics, with common complaints focused on the plot, gameplay, and visuals. \"\"New York\"\" magazine's Nicholas Pileggi described it as a loser when compared to other games Atari could have released like \"\"Donkey Kong\"\" and \"\"Frogger\"\". \"\"Video Games\"\" called the game \"\"really for kids (the littler ones)\"\". Kevin Bowen of GameSpy's Classic Gaming called the gameplay \"\"convoluted and inane\"\", also criticizing its story for departing from the serious tone"}
+{"id": "8", "contents": "\"Ao Oni (film)\"\n(for Takeshi cannot see Shun), and why Anna said Mika will not call him, because he is already dead and cannot be seen by everyone, except Anna. He then flashbacks to the time Takuro bullied him in the riverbank. Takuro hit Shun in the head, killing him. It is then revealed that Takuro went to the empty house to hide Shun's body. Takuro then explained that Shun used Takuro's name in the game and got mad. Shun is terrified with the truth and finally disappears as Anna apologizes to him. Takuro plans on killing Anna since she knows too much,"}
+{"id": "9", "contents": "\"4th Airborne Corps (Soviet Union)\"\n4th Airborne Corps (Soviet Union) The 4th Airborne Corps was an airborne corps of the Red Army in World War II. It fought in the Vyazma airborne operation, an unsuccessful landing during the Rzhev-Vyazma Offensive. The corps was formed in the spring of 1941 in the Western Special Military District from the personnel of the 214th Airborne Brigade. The corps was commanded by Aleksey Semenovich Zhadov. On 22 June 1941, the corps was stationed in the Western Front's second echelon in Pukhavichy in Minsk Region. On 26 June, the corps was ordered to conduct an air-assault and ground attack with"}

code/RL_model/verl/Search-R1/misc/example/multinode/train_grpo_multinode_32b.sh ADDED Viewed

	@@ -0,0 +1,77 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+RAY_DASHBOARD_ADDRESS="http://xx.xx.xx.xx:8265" # your head node address
+N_NODES=4
+export BASE_MODEL='Qwen/Qwen2.5-32B'
+export EXPERIMENT_NAME=${train_data}-${test_data}-search-r1-grpo-qwen2.5-32b-em-multinode-${N_NODES}
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+# max_prompt_length = (config['training']['max_start_length'] + config['training']['max_response_length'] * (config['training']['max_turns'] - 1) + config['training']['max_obs_length'] * config['training']['max_turns'])
+ulimit -n 65535
+ray job submit --address=$RAY_DASHBOARD_ADDRESS \
+    --runtime-env=verl/trainer/runtime_env.yaml \
+    --no-wait \
+    -- \
+    python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=grpo \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr=2e-7 \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=false \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=false \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=false \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=false \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    algorithm.no_think_rl=false \
+    actor_rollout_ref.rollout.n_agent=5 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.actor.state_masking=True \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=false \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=$N_NODES \
+    trainer.save_freq=100 \
+    trainer.test_freq=100 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=1005 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee $EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/example/multinode/train_grpo_multinode_72b.sh ADDED Viewed

	@@ -0,0 +1,75 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+RAY_DASHBOARD_ADDRESS="http://xx.xx.xx.xx:8265" # your head node address
+N_NODES=4
+export BASE_MODEL='Qwen/Qwen2.5-72B'
+export EXPERIMENT_NAME=${train_data}-${test_data}-search-r1-grpo-qwen2.5-72b-em-multinode-${N_NODES}
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+ulimit -n 65535
+ray job submit --address=$RAY_DASHBOARD_ADDRESS \
+    --runtime-env=verl/trainer/runtime_env.yaml \
+    --no-wait \
+    -- \
+    python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=grpo \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr=1e-7 \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=32 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=True \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=True \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=32 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=32 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    algorithm.no_think_rl=false \
+    actor_rollout_ref.rollout.n_agent=5 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.actor.state_masking=True \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=false \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=$N_NODES \
+    trainer.save_freq=100 \
+    trainer.test_freq=100 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=1005 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee $EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/example/multinode/train_ppo_multinode_32b.sh ADDED Viewed

	@@ -0,0 +1,84 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+RAY_DASHBOARD_ADDRESS="http://xx.xx.xx.xx:8265" # your head node address
+N_NODES=4
+export BASE_MODEL='Qwen/Qwen2.5-32B'
+export EXPERIMENT_NAME=${train_data}-${test_data}-search-r1-ppo-qwen2.5-32b-em-multinode-${N_NODES}
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS
+ulimit -n 65535
+ray job submit --address=$RAY_DASHBOARD_ADDRESS \
+    --runtime-env=verl/trainer/runtime_env.yaml \
+    --no-wait \
+    -- \
+    python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=gae \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.actor.optim.lr=2e-7 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=32 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=32 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=32 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=False \
+    actor_rollout_ref.rollout.n_agent=1 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.rollout.top_p=1.0 \
+    actor_rollout_ref.actor.state_masking=true \
+    critic.optim.lr=1e-5 \
+    critic.model.use_remove_padding=True \
+    critic.optim.lr_warmup_steps_ratio=0.015 \
+    critic.model.path=$BASE_MODEL \
+    critic.model.enable_gradient_checkpointing=true \
+    critic.ppo_micro_batch_size=32 \
+    critic.model.fsdp_config.param_offload=False \
+    critic.model.fsdp_config.grad_offload=False \
+    critic.model.fsdp_config.optimizer_offload=True \
+    algorithm.kl_ctrl.kl_coef=0.001 \
+    algorithm.no_think_rl=false \
+    trainer.critic_warmup=0 \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=true \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=$N_NODES \
+    trainer.save_freq=100 \
+    trainer.test_freq=100 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=1005 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee $EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_ann.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+file_path=/the/path/you/save/corpus
+index_file=$file_path/e5_HNSW64.index
+corpus_file=$file_path/wiki-18.jsonl
+retriever_name=e5
+retriever_path=intfloat/e5-base-v2
+python search_r1/search/retrieval_server.py --index_path $index_file \
+                                            --corpus_path $corpus_file \
+                                            --topk 3 \
+                                            --retriever_name $retriever_name \
+                                            --retriever_model $retriever_path

code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_bm25.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+file_path=/the/path/you/save/corpus
+index_file=$file_path/bm25
+corpus_file=$file_path/wiki-18.jsonl
+retriever_name=bm25
+python search_r1/search/retrieval_server.py --index_path $index_file \
+                                            --corpus_path $corpus_file \
+                                            --topk 3 \
+                                            --retriever_name $retriever_name

code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_google.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+api_key="" # put your google custom API key here (https://developers.google.com/custom-search/v1/overview)
+cse_id="" # put your google cse API key here (https://developers.google.com/custom-search/v1/overview)
+python search_r1/search/internal_google_server.py --api_key $api_key \
+                                            --topk 5 \
+                                            --cse_id $cse_id \
+                                            --snippet_only

code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_hierarchical.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+file_path=/the/path/you/save/corpus
+index_file=$file_path/e5_Flat.index
+corpus_file=$file_path/wiki-18.jsonl
+retriever_name=e5
+retriever_path=intfloat/e5-base-v2
+reranker_path=cross-encoder/ms-marco-MiniLM-L12-v2
+python search_r1/search/retrieval_rerank_server.py --index_path $index_file \
+                                            --corpus_path $corpus_file \
+                                            --retrieval_topk 10 \
+                                            --retriever_name $retriever_name \
+                                            --retriever_model $retriever_path \
+                                            --faiss_gpu \
+                                            --reranking_topk 3 \
+                                            --reranker_model $reranker_path \
+                                            --reranker_batch_size 32

code/RL_model/verl/Search-R1/misc/example/retriever/retrieval_launch_serpapi.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+search_url=https://serpapi.com/search
+serp_api_key="" # put your serp api key here (https://serpapi.com/)
+python search_r1/search/online_search_server.py --search_url $search_url \
+                                            --topk 3 \
+                                            --serp_api_key $serp_api_key

code/RL_model/verl/Search-R1/misc/public/status.png ADDED Viewed

code/RL_model/verl/Search-R1/misc/public/worker.png ADDED Viewed

code/RL_model/verl/Search-R1/misc/scripts/data_process/nq.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Preprocess the nq dataset to parquet format
+"""
+import re
+import os
+import datasets
+from verl.utils.hdfs_io import copy, makedirs
+import argparse
+def make_prefix(dp, template_type):
+    question = dp['question']
+    # NOTE: also need to change reward_score/countdown.py
+    if template_type == 'base':
+        """This works for any base model"""
+        prefix = f"""Answer the given question. \
+You should first have a reasoning process in mind and then provides the answer. \
+Show your reasoning in <think> </think> tags and return the final answer in <answer> </answer> tags, for example <answer> Beijing </answer>. \
+Question: {question}\n"""
+    else:
+        raise NotImplementedError
+    return prefix
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--local_dir', default='./data/nq')
+    parser.add_argument('--hdfs_dir', default=None)
+    parser.add_argument('--template_type', type=str, default='base')
+    args = parser.parse_args()
+    data_source = 'nq'
+    dataset = datasets.load_dataset('RUC-NLPIR/FlashRAG_datasets', 'nq')
+    train_dataset = dataset['train']
+    test_dataset = dataset['test']
+    # add a row to each data item that represents a unique id
+    def make_map_fn(split):
+        def process_fn(example, idx):
+            example['question'] = example['question'].strip()
+            if example['question'][-1] != '?':
+                example['question'] += '?'
+            question = make_prefix(example, template_type=args.template_type)
+            solution = {
+                "target": example['golden_answers'],
+            }
+            data = {
+                "data_source": data_source,
+                "prompt": [{
+                    "role": "user",
+                    "content": question,
+                }],
+                "ability": "fact-reasoning",
+                "reward_model": {
+                    "style": "rule",
+                    "ground_truth": solution
+                },
+                "extra_info": {
+                    'split': split,
+                    'index': idx,
+                }
+            }
+            return data
+        return process_fn
+    train_dataset = train_dataset.map(function=make_map_fn('train'), with_indices=True)
+    test_dataset = test_dataset.map(function=make_map_fn('test'), with_indices=True)
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+    train_dataset.to_parquet(os.path.join(local_dir, 'train.parquet'))
+    test_dataset.to_parquet(os.path.join(local_dir, 'test.parquet'))
+    if hdfs_dir is not None:
+        makedirs(hdfs_dir)
+        copy(src=local_dir, dst=hdfs_dir)

code/RL_model/verl/Search-R1/misc/scripts/data_process/nq_rag.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Preprocess the nq dataset to parquet format
+"""
+import re
+import os
+import json
+import datasets
+from verl.utils.hdfs_io import copy, makedirs
+import argparse
+def make_prefix(dp, template_type):
+    question = dp['question']
+    context = dp['context']
+    # NOTE: also need to change reward_score/countdown.py
+    if template_type == 'base':
+        """This works for any base model"""
+        prefix = f"""Answer the given question with some potentially useful context. \
+You should analyze the question carefully, evaluate the given context (which may or may not be useful), and then generate an accurate and well-reasoned response. \
+You should first have a reasoning process in mind and then provides the answer. \
+Show your reasoning in <think> </think> tags and return the final answer in <answer> </answer> tags, for example <answer> Beijing </answer>. \
+Question: {question} Context: {context} \n"""
+    else:
+        raise NotImplementedError
+    return prefix
+def format_reference(retrieval_result):
+    format_reference = ''
+    for idx, doc_item in enumerate(retrieval_result):
+        content = doc_item['contents']
+        title = content.split("\n")[0]
+        text = "\n".join(content.split("\n")[1:])
+        format_reference += f"Doc {idx+1}(Title: {title}) {text}\n"
+    return format_reference
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--local_dir', default='./data/nq_rag')
+    parser.add_argument('--hdfs_dir', default=None)
+    parser.add_argument('--template_type', type=str, default='base')
+    parser.add_argument('--topk', type=int, default=3)
+    parser.add_argument('--corpus_path', type=str, default='/home/peterjin/mnt/data/retrieval-corpus/wiki-18.jsonl')
+    parser.add_argument('--train_retrieval_cache', type=str, default='/home/peterjin/rag_retrieval_cache/nq/e5_train_retrieval_cache_2048.json')
+    parser.add_argument('--test_retrieval_cache', type=str, default='/home/peterjin/rag_retrieval_cache/nq/e5_test_retrieval_cache_10000.json')
+    args = parser.parse_args()
+    data_source = 'nq'
+    dataset = datasets.load_dataset('RUC-NLPIR/FlashRAG_datasets', 'nq')
+    train_dataset = dataset['train']
+    test_dataset = dataset['test']
+    # read retrieval cache
+    print('reading retrieval cache...')
+    retrieval_cache = json.load(open(args.train_retrieval_cache))
+    # test_retrieval_cache = json.load(open(args.test_retrieval_cache))
+    retrieval_cache.update(json.load(open(args.test_retrieval_cache)))
+    # read corpus
+    print('reading corpus...')
+    corpus = {}
+    with open(args.corpus_path) as f:
+        readin = f.readlines()
+        for line in readin:
+            tmp = json.loads(line)
+            corpus[tmp['id']] = tmp
+    # add a column for the retrieval context
+    def add_context(example):
+        example['context'] = format_reference([corpus[docs["id"]] for docs in retrieval_cache[example['question']][:args.topk]])
+        return example
+    train_dataset = train_dataset.map(function=add_context)
+    test_dataset = test_dataset.map(function=add_context)
+    # add a row to each data item that represents a unique id
+    def make_map_fn(split):
+        def process_fn(example, idx):
+            example['question'] = example['question'].strip()
+            if example['question'][-1] != '?':
+                example['question'] += '?'
+            question = make_prefix(example, template_type=args.template_type)
+            solution = {
+                "target": example['golden_answers'],
+            }
+            data = {
+                "data_source": data_source,
+                "prompt": [{
+                    "role": "user",
+                    "content": question,
+                }],
+                "ability": "fact-reasoning",
+                "reward_model": {
+                    "style": "rule",
+                    "ground_truth": solution
+                },
+                "extra_info": {
+                    'split': split,
+                    'index': idx,
+                }
+            }
+            return data
+        return process_fn
+    train_dataset = train_dataset.map(function=make_map_fn('train'), with_indices=True)
+    test_dataset = test_dataset.map(function=make_map_fn('test'), with_indices=True)
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+    train_dataset.to_parquet(os.path.join(local_dir, 'train.parquet'))
+    test_dataset.to_parquet(os.path.join(local_dir, 'test.parquet'))
+    if hdfs_dir is not None:
+        makedirs(hdfs_dir)
+        copy(src=local_dir, dst=hdfs_dir)

code/RL_model/verl/Search-R1/misc/scripts/data_process/nq_search.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Preprocess the nq dataset to parquet format
+"""
+import re
+import os
+import datasets
+from verl.utils.hdfs_io import copy, makedirs
+import argparse
+def make_prefix(dp, template_type):
+    question = dp['question']
+    # NOTE: also need to change reward_score/countdown.py
+    if template_type == 'base':
+        """This works for any base model"""
+        prefix = f"""Answer the given question. \
+You must conduct reasoning inside <think> and </think> first every time you get new information. \
+After reasoning, if you find you lack some knowledge, you can call a search engine by <search> query </search> and it will return the top searched results between <information> and </information>. \
+You can search as many times as your want. \
+If you find no further external knowledge needed, you can directly provide the answer inside <answer> and </answer>, without detailed illustrations. For example, <answer> Beijing </answer>. Question: {question}\n"""
+    else:
+        raise NotImplementedError
+    return prefix
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--local_dir', default='./data/nq_search')
+    parser.add_argument('--hdfs_dir', default=None)
+    parser.add_argument('--template_type', type=str, default='base')
+    args = parser.parse_args()
+    data_source = 'nq'
+    dataset = datasets.load_dataset('RUC-NLPIR/FlashRAG_datasets', 'nq')
+    train_dataset = dataset['train']
+    test_dataset = dataset['test']
+    # add a row to each data item that represents a unique id
+    def make_map_fn(split):
+        def process_fn(example, idx):
+            example['question'] = example['question'].strip()
+            if example['question'][-1] != '?':
+                example['question'] += '?'
+            question = make_prefix(example, template_type=args.template_type)
+            solution = {
+                "target": example['golden_answers'],
+            }
+            data = {
+                "data_source": data_source,
+                "prompt": [{
+                    "role": "user",
+                    "content": question,
+                }],
+                "ability": "fact-reasoning",
+                "reward_model": {
+                    "style": "rule",
+                    "ground_truth": solution
+                },
+                "extra_info": {
+                    'split': split,
+                    'index': idx,
+                }
+            }
+            return data
+        return process_fn
+    train_dataset = train_dataset.map(function=make_map_fn('train'), with_indices=True)
+    test_dataset = test_dataset.map(function=make_map_fn('test'), with_indices=True)
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+    train_dataset.to_parquet(os.path.join(local_dir, 'train.parquet'))
+    test_dataset.to_parquet(os.path.join(local_dir, 'test.parquet'))
+    if hdfs_dir is not None:
+        makedirs(hdfs_dir)
+        copy(src=local_dir, dst=hdfs_dir)

code/RL_model/verl/Search-R1/misc/scripts/data_process/qa_search_test_merge.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Preprocess the QA dataset to parquet format
+"""
+import re
+import os
+import datasets
+from verl.utils.hdfs_io import copy, makedirs
+import argparse
+def make_prefix(dp, template_type):
+    question = dp['question']
+    # NOTE: also need to change reward_score/countdown.py
+    if template_type == 'base':
+        """This works for any base model"""
+        prefix = f"""Answer the given question. \
+You must conduct reasoning inside <think> and </think> first every time you get new information. \
+After reasoning, if you find you lack some knowledge, you can call a search engine by <search> query </search> and it will return the top searched results between <information> and </information>. \
+You can search as many times as your want. \
+If you find no further external knowledge needed, you can directly provide the answer inside <answer> and </answer>, without detailed illustrations. For example, <answer> Beijing </answer>. Question: {question}\n"""
+    else:
+        raise NotImplementedError
+    return prefix
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--local_dir', default='./data/nq_search')
+    parser.add_argument('--hdfs_dir', default=None)
+    parser.add_argument('--template_type', type=str, default='base')
+    parser.add_argument('--data_sources', default='nq')
+    args = parser.parse_args()
+    data_sources = args.data_sources.split(',')
+    all_dataset = []
+    for data_source in data_sources:
+        if data_source != 'strategyqa':
+            dataset = datasets.load_dataset('RUC-NLPIR/FlashRAG_datasets', data_source)
+        else:
+            dataset = datasets.load_dataset('json', data_files="/home/peterjin/mnt/data/strategyqa/test_correct.jsonl")
+        if 'test' in dataset:
+            print(f'Using the {data_source} test dataset...')
+            test_dataset = dataset['test']
+        elif 'dev' in dataset:
+            print(f'Using the {data_source} dev dataset...')
+            test_dataset = dataset['dev']
+        else:
+            print(f'Using the {data_source} train dataset...')
+            test_dataset = dataset['train']
+        # add a row to each data item that represents a unique id
+        def make_map_fn(split):
+            def process_fn(example, idx):
+                example['question'] = example['question'].strip()
+                if example['question'][-1] != '?':
+                    example['question'] += '?'
+                question = make_prefix(example, template_type=args.template_type)
+                solution = {
+                    "target": example['golden_answers'],
+                }
+                data = {
+                    "data_source": data_source,
+                    "prompt": [{
+                        "role": "user",
+                        "content": question,
+                    }],
+                    "ability": "fact-reasoning",
+                    "reward_model": {
+                        "style": "rule",
+                        "ground_truth": solution
+                    },
+                    "extra_info": {
+                        'split': split,
+                        'index': idx,
+                    }
+                }
+                return data
+            return process_fn
+        test_dataset = test_dataset.map(function=make_map_fn('test'), with_indices=True)
+        all_dataset.append(test_dataset)
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+    all_test_dataset = datasets.concatenate_datasets(all_dataset)
+    all_test_dataset.to_parquet(os.path.join(local_dir, 'test.parquet'))
+    if hdfs_dir is not None:
+        makedirs(hdfs_dir)
+        copy(src=local_dir, dst=hdfs_dir)

code/RL_model/verl/Search-R1/misc/scripts/data_process/qa_search_train_merge.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Preprocess the QA dataset to parquet format
+"""
+import re
+import os
+import datasets
+from verl.utils.hdfs_io import copy, makedirs
+import argparse
+def make_prefix(dp, template_type):
+    question = dp['question']
+    # NOTE: also need to change reward_score/countdown.py
+    if template_type == 'base':
+        """This works for any base model"""
+        prefix = f"""Answer the given question. \
+You must conduct reasoning inside <think> and </think> first every time you get new information. \
+After reasoning, if you find you lack some knowledge, you can call a search engine by <search> query </search> and it will return the top searched results between <information> and </information>. \
+You can search as many times as your want. \
+If you find no further external knowledge needed, you can directly provide the answer inside <answer> and </answer>, without detailed illustrations. For example, <answer> Beijing </answer>. Question: {question}\n"""
+    else:
+        raise NotImplementedError
+    return prefix
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--local_dir', default='./data/nq_search')
+    parser.add_argument('--hdfs_dir', default=None)
+    parser.add_argument('--template_type', type=str, default='base')
+    parser.add_argument('--data_sources', default='nq')
+    args = parser.parse_args()
+    # data_source = 'nq'
+    data_sources = args.data_sources.split(',')
+    all_dataset = []
+    for data_source in data_sources:
+        dataset = datasets.load_dataset('RUC-NLPIR/FlashRAG_datasets', data_source)
+        train_dataset = dataset['train']
+        # add a row to each data item that represents a unique id
+        def make_map_fn(split):
+            def process_fn(example, idx):
+                example['question'] = example['question'].strip()
+                if example['question'][-1] != '?':
+                    example['question'] += '?'
+                question = make_prefix(example, template_type=args.template_type)
+                solution = {
+                    "target": example['golden_answers'],
+                }
+                data = {
+                    "data_source": data_source,
+                    "prompt": [{
+                        "role": "user",
+                        "content": question,
+                    }],
+                    "ability": "fact-reasoning",
+                    "reward_model": {
+                        "style": "rule",
+                        "ground_truth": solution
+                    },
+                    "extra_info": {
+                        'split': split,
+                        'index': idx,
+                    }
+                }
+                return data
+            return process_fn
+        train_dataset = train_dataset.map(function=make_map_fn('train'), with_indices=True)
+        all_dataset.append(train_dataset)
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+    all_train_dataset = datasets.concatenate_datasets(all_dataset)
+    all_train_dataset.to_parquet(os.path.join(local_dir, 'train.parquet'))
+    if hdfs_dir is not None:
+        makedirs(hdfs_dir)
+        copy(src=local_dir, dst=hdfs_dir)

code/RL_model/verl/Search-R1/misc/scripts/download.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import argparse
+from huggingface_hub import hf_hub_download
+parser = argparse.ArgumentParser(description="Download files from a Hugging Face dataset repository.")
+parser.add_argument("--repo_id", type=str, default="PeterJinGo/wiki-18-e5-index", help="Hugging Face repository ID")
+parser.add_argument("--save_path", type=str, required=True, help="Local directory to save files")
+args = parser.parse_args()
+repo_id = "PeterJinGo/wiki-18-e5-index"
+for file in ["part_aa", "part_ab"]:
+    hf_hub_download(
+        repo_id=repo_id,
+        filename=file,  # e.g., "e5_Flat.index"
+        repo_type="dataset",
+        local_dir=args.save_path,
+    )
+repo_id = "PeterJinGo/wiki-18-corpus"
+hf_hub_download(
+        repo_id=repo_id,
+        filename="wiki-18.jsonl.gz",
+        repo_type="dataset",
+        local_dir=args.save_path,
+)

code/RL_model/verl/Search-R1/misc/scripts/download.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+save_path=/home/peterjin/debug_cache
+python download.py --savepath $savepath
+cat $save_path/part_* > e5_Flat.index

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/README.md ADDED Viewed

	@@ -0,0 +1,42 @@

+## Reproduce the paper results
+### Download the dataset
+```bash
+huggingface-cli download --repo-type dataset PeterJinGo/nq_hotpotqa_train --local-dir $WORK_DIR/data/nq_hotpotqa_train
+```
+### Launch the local search engine
+(1) Download the indexing and corpus.
+```bash
+save_path=/the/path/to/save
+python scripts/download.py --save_path $save_path
+cat $save_path/part_* > $save_path/e5_Flat.index
+gzip -d $save_path/wiki-18.jsonl.gz
+```
+(2) Launch a local retrieval server.
+```bash
+conda activate retriever
+bash retrieval_launch.sh
+```
+### Run PPO training
+```bash
+bash train_ppo.sh
+```
+### Run GRPO training
+```bash
+bash train_grpo.sh
+```
+### Run evaluation
+```bash
+bash evaluate.sh
+```
+You can change ```$BASE_MODEL``` to the path of the model you would like to evaluate.

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/data_process.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+WORK_DIR=your/work/dir
+LOCAL_DIR=$WORK_DIR/data/nq_hotpotqa_train
+## process multiple dataset search format train file
+DATA=nq,hotpotqa
+python $WORK_DIR/scripts/data_process/qa_search_train_merge.py --local_dir $LOCAL_DIR --data_sources $DATA
+## process multiple dataset search format test file
+DATA=nq,triviaqa,popqa,hotpotqa,2wikimultihopqa,musique,bamboogle
+python $WORK_DIR/scripts/data_process/qa_search_test_merge.py --local_dir $LOCAL_DIR --data_sources $DATA

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/evaluate.sh ADDED Viewed

	@@ -0,0 +1,65 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+export BASE_MODEL=""
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+# max_prompt_length = (config['training']['max_start_length'] + config['training']['max_response_length'] * (config['training']['max_turns'] - 1) + config['training']['max_obs_length'] * config['training']['max_turns'])
+PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=gae \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=true \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=true \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=true \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    actor_rollout_ref.rollout.n_agent=1 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.actor.state_masking=true \
+    critic.optim.lr=1e-5 \
+    critic.model.use_remove_padding=True \
+    critic.optim.lr_warmup_steps_ratio=0.05 \
+    critic.model.path=$BASE_MODEL \
+    critic.model.enable_gradient_checkpointing=true \
+    critic.ppo_micro_batch_size=8 \
+    critic.model.fsdp_config.param_offload=true \
+    critic.model.fsdp_config.grad_offload=true \
+    critic.model.fsdp_config.optimizer_offload=true \
+    algorithm.kl_ctrl.kl_coef=0.001 \
+    algorithm.no_think_rl=false \
+    trainer.critic_warmup=0 \
+    trainer.logger=[] \
+    +trainer.val_only=true \
+    +trainer.val_before_train=true \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.1/train_grpo.sh ADDED Viewed

	@@ -0,0 +1,84 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+export BASE_MODEL='meta-llama/Llama-3.2-3B'
+export EXPERIMENT_NAME=${data_name}-search-r1-grpo-llama3.2-3b-em
+# export BASE_MODEL='meta-llama/Llama-3.2-3B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-llama3.2-3b-it-em
+# export BASE_MODEL='meta-llama/Llama-3.1-8B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-llama3.1-8b-em
+# export BASE_MODEL='meta-llama/Llama-3.1-8B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-llama3.1-8b-it-em
+# export BASE_MODEL='Qwen/Qwen2.5-3B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-3b-em
+# export BASE_MODEL='Qwen/Qwen2.5-3B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-3b-it-em
+# export BASE_MODEL='Qwen/Qwen2.5-7B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-7b-em
+# export BASE_MODEL='Qwen/Qwen2.5-7B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-7b-it-em
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+# max_prompt_length = (config['training']['max_start_length'] + config['training']['max_response_length'] * (config['training']['max_turns'] - 1) + config['training']['max_obs_length'] * config['training']['max_turns'])
+PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=grpo \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95 \
+    actor_rollout_ref.actor.use_kl_loss=true \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=true \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=true \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=true \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    algorithm.no_think_rl=false \
+    actor_rollout_ref.rollout.n_agent=5 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.actor.state_masking=true \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=true \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=100 \
+    trainer.test_freq=50 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=305 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee $EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.1/train_ppo.sh ADDED Viewed

	@@ -0,0 +1,92 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+export BASE_MODEL='meta-llama/Llama-3.2-3B'
+export EXPERIMENT_NAME=${data_name}-search-r1-ppo-llama3.2-3b-em
+# export BASE_MODEL='meta-llama/Llama-3.2-3B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-llama3.2-3b-it-em
+# export BASE_MODEL='meta-llama/Llama-3.1-8B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-llama3.1-8b-em
+# export BASE_MODEL='meta-llama/Llama-3.1-8B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-llama3.1-8b-it-em
+# export BASE_MODEL='Qwen/Qwen2.5-3B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-3b-em
+# export BASE_MODEL='Qwen/Qwen2.5-3B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-3b-it-em
+# export BASE_MODEL='Qwen/Qwen2.5-7B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-7b-em
+# export BASE_MODEL='Qwen/Qwen2.5-7B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-7b-it-em
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+# max_prompt_length = (config['training']['max_start_length'] + config['training']['max_response_length'] * (config['training']['max_turns'] - 1) + config['training']['max_obs_length'] * config['training']['max_turns'])
+PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=gae \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=true \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=true \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=true \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    actor_rollout_ref.rollout.n_agent=1 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.actor.state_masking=true \
+    critic.optim.lr=1e-5 \
+    critic.model.use_remove_padding=True \
+    critic.optim.lr_warmup_steps_ratio=0.05 \
+    critic.model.path=$BASE_MODEL \
+    critic.model.enable_gradient_checkpointing=true \
+    critic.ppo_micro_batch_size=8 \
+    critic.model.fsdp_config.param_offload=true \
+    critic.model.fsdp_config.grad_offload=true \
+    critic.model.fsdp_config.optimizer_offload=true \
+    algorithm.kl_ctrl.kl_coef=0.001 \
+    algorithm.no_think_rl=false \
+    trainer.critic_warmup=0 \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=true \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=100 \
+    trainer.test_freq=50 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=305 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee $EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.2/train_grpo.sh ADDED Viewed

	@@ -0,0 +1,79 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+# export BASE_MODEL='Qwen/Qwen2.5-3B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-3b-em
+# export BASE_MODEL='Qwen/Qwen2.5-3B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-3b-it-em
+export BASE_MODEL='Qwen/Qwen2.5-7B'
+export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-7b-em
+# export BASE_MODEL='Qwen/Qwen2.5-7B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-7b-it-em
+# export BASE_MODEL='Qwen/Qwen2.5-14B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-14b-em
+# export BASE_MODEL='Qwen/Qwen2.5-14B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-14b-it-em
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+# max_prompt_length = (config['training']['max_start_length'] + config['training']['max_response_length'] * (config['training']['max_turns'] - 1) + config['training']['max_obs_length'] * config['training']['max_turns'])
+PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=grpo \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
+    actor_rollout_ref.actor.use_kl_loss=true \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=true \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=true \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=true \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    algorithm.no_think_rl=false \
+    actor_rollout_ref.rollout.n_agent=5 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.actor.state_masking=true \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=true \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=100 \
+    trainer.test_freq=100 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=1005 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee $EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.2/train_ppo.sh ADDED Viewed

	@@ -0,0 +1,88 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+# export BASE_MODEL='Qwen/Qwen2.5-3B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-3b-em
+# export BASE_MODEL='Qwen/Qwen2.5-3B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-3b-it-em
+export BASE_MODEL='Qwen/Qwen2.5-7B'
+export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-7b-em
+# export BASE_MODEL='Qwen/Qwen2.5-7B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-7b-it-em
+# export BASE_MODEL='Qwen/Qwen2.5-14B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-14b-em
+# export BASE_MODEL='Qwen/Qwen2.5-14B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-14b-it-em
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+# max_prompt_length = (config['training']['max_start_length'] + config['training']['max_response_length'] * (config['training']['max_turns'] - 1) + config['training']['max_obs_length'] * config['training']['max_turns'])
+PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=gae \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=true \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=true \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=true \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    actor_rollout_ref.rollout.n_agent=1 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.rollout.top_p=1.0 \
+    actor_rollout_ref.actor.state_masking=true \
+    critic.optim.lr=1e-5 \
+    critic.model.use_remove_padding=True \
+    critic.optim.lr_warmup_steps_ratio=0.015 \
+    critic.model.path=$BASE_MODEL \
+    critic.model.enable_gradient_checkpointing=true \
+    critic.ppo_micro_batch_size=8 \
+    critic.model.fsdp_config.param_offload=true \
+    critic.model.fsdp_config.grad_offload=true \
+    critic.model.fsdp_config.optimizer_offload=true \
+    algorithm.kl_ctrl.kl_coef=0.001 \
+    algorithm.no_think_rl=false \
+    trainer.critic_warmup=0 \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=true \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=100 \
+    trainer.test_freq=100 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=1005 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=verl_checkpoints/$EXPERIMENT_NAME \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee $EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.3/train_grpo_format.sh ADDED Viewed

	@@ -0,0 +1,87 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+export BASE_MODEL='Qwen/Qwen2.5-3B'
+export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-3b-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-3B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-3b-it-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-7B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-7b-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-7B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-7b-it-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-14B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-14b-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-14B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-14b-it-em-structureformat
+# export BASE_MODEL='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-deepseekr1-7b-em-structureformat
+# export BASE_MODEL='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-deepseekr1-14b-em-structureformat
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+# max_prompt_length = (config['training']['max_start_length'] + config['training']['max_response_length'] * (config['training']['max_turns'] - 1) + config['training']['max_obs_length'] * config['training']['max_turns'])
+PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo_format \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=grpo \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr=5e-7 \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
+    actor_rollout_ref.actor.use_kl_loss=true \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=true \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=true \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=true \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    algorithm.no_think_rl=false \
+    actor_rollout_ref.rollout.n_agent=5 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.actor.state_masking=true \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=true \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=100 \
+    trainer.test_freq=100 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=1005 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=/home/peterjin/verl_checkpoints/$EXPERIMENT_NAME \
+    reward_model.structure_format_score=0.2 \
+    reward_model.final_format_score=0.1 \
+    reward_model.retrieval_score=0 \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee /home/peterjin/rl_logs/$EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/scripts/nq_hotpotqa/v0.3/train_ppo_format.sh ADDED Viewed

	@@ -0,0 +1,94 @@

+data_name=nq_hotpotqa_train
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export DATA_DIR=data/${data_name} # first download the data from https://huggingface.co/datasets/PeterJinGo/nq_hotpotqa_train
+WAND_PROJECT="Search-R1"
+export BASE_MODEL='Qwen/Qwen2.5-3B'
+export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-3b-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-3B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-3b-it-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-7B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-7b-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-7B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-qwen2.5-7b-it-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-14B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-14b-em-structureformat
+# export BASE_MODEL='Qwen/Qwen2.5-14B-Instruct'
+# export EXPERIMENT_NAME=${data_name}-search-r1-grpo-qwen2.5-14b-it-em-structureformat
+# export BASE_MODEL='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B'
+# export EXPERIMENT_NAME=${data_name}-search-r1-ppo-deepseekr1-14b-em-structureformat
+# set -x
+export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
+# max_prompt_length = (config['training']['max_start_length'] + config['training']['max_response_length'] * (config['training']['max_turns'] - 1) + config['training']['max_obs_length'] * config['training']['max_turns'])
+PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo_format \
+    data.train_files=$DATA_DIR/train.parquet \
+    data.val_files=$DATA_DIR/test.parquet \
+    data.train_data_num=null \
+    data.val_data_num=null \
+    data.train_batch_size=512 \
+    data.val_batch_size=256 \
+    data.max_prompt_length=4096 \
+    data.max_response_length=500 \
+    data.max_start_length=2048 \
+    data.max_obs_length=500 \
+    data.shuffle_train_dataloader=True \
+    algorithm.adv_estimator=gae \
+    actor_rollout_ref.model.path=$BASE_MODEL \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=true \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.285 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size=64 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=true \
+    actor_rollout_ref.actor.fsdp_config.grad_offload=true \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=true \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    actor_rollout_ref.rollout.n_agent=1 \
+    actor_rollout_ref.rollout.temperature=1 \
+    actor_rollout_ref.rollout.top_p=1.0 \
+    actor_rollout_ref.actor.state_masking=true \
+    critic.optim.lr=1e-5 \
+    critic.model.use_remove_padding=True \
+    critic.optim.lr_warmup_steps_ratio=0.015 \
+    critic.model.path=$BASE_MODEL \
+    critic.model.enable_gradient_checkpointing=true \
+    critic.ppo_micro_batch_size=8 \
+    critic.model.fsdp_config.param_offload=true \
+    critic.model.fsdp_config.grad_offload=true \
+    critic.model.fsdp_config.optimizer_offload=true \
+    algorithm.kl_ctrl.kl_coef=0.001 \
+    algorithm.no_think_rl=false \
+    trainer.critic_warmup=0 \
+    trainer.logger=['wandb'] \
+    +trainer.val_only=false \
+    +trainer.val_before_train=true \
+    trainer.default_hdfs_dir=null \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=100 \
+    trainer.test_freq=100 \
+    trainer.project_name=$WAND_PROJECT \
+    trainer.experiment_name=$EXPERIMENT_NAME \
+    trainer.total_epochs=15 \
+    trainer.total_training_steps=1005 \
+    trainer.default_hdfs_dir=null \
+    trainer.default_local_dir=/home/peterjin/verl_checkpoints/$EXPERIMENT_NAME \
+    reward_model.structure_format_score=0.2 \
+    reward_model.final_format_score=0.1 \
+    reward_model.retrieval_score=0 \
+    max_turns=4 \
+    retriever.url="http://127.0.0.1:8000/retrieve" \
+    retriever.topk=3 \
+    2>&1 | tee /home/peterjin/rl_logs/$EXPERIMENT_NAME.log

code/RL_model/verl/Search-R1/misc/scripts/upload.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import os
+from huggingface_hub import upload_file
+repo_id = "PeterJinGo/wiki-18-e5-index"
+path = "/home/peterjin/mnt/index/wiki-18"
+for file in ["part_aa", "part_ab"]:
+    upload_file(
+        path_or_fileobj=os.path.join(path, file),  # File path
+        path_in_repo=file,  # Destination filename in the repo
+        repo_id=repo_id,  # Your dataset repo ID
+        repo_type="dataset"
+    )

code/RL_model/verl/Search-R1/misc/scripts/upload.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+index=/home/peterjin/mnt/index/wiki-18/e5_Flat.index
+split -b 40G $index part_
+python upload.py

code/RL_model/verl/Search-R1/verl/models/README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+# Models
+Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl.
+## Adding a New Huggingface Model
+### Step 1: Copy the model file from HF to verl
+- Add a new file under verl/models/hf
+- Copy ONLY the model file from huggingface/transformers/models to verl/models/hf
+### Step 2: Modify the model file to use packed inputs
+- Remove all the code related to inference (kv cache)
+- Modify the inputs to include only
+    - input_ids (total_nnz,)
+    - cu_seqlens (total_nnz + 1,)
+    - max_seqlen_in_batch: int
+- Note that this requires using flash attention with causal mask.
+### Step 2.5: Add tests
+- Add a test to compare this version and the huggingface version
+- Following the infrastructure and add tests to tests/models/hf
+### Step 3: Add a function to apply tensor parallelism
+- Please follow
+    - https://pytorch.org/docs/stable/distributed.tensor.parallel.html
+    - https://pytorch.org/tutorials/intermediate/TP_tutorial.html
+- General comments
+    - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward.
+### Step 4: Add a function to apply data parallelism
+- Please use FSDP2 APIs
+- See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413
+### Step 5: Add a function to apply pipeline parallelism
+- Comes in Pytorch 2.4
+- Currently only in alpha in nightly version
+- Check torchtitan for more details

code/RL_model/verl/Search-R1/verl/models/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

code/RL_model/verl/Search-R1/verl/models/llama/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.