| | --- |
| | license: apache-2.0 |
| | base_model: EssentialAI/rnj-1-instruct |
| | --- |
| | |
| | # hex-toolcall-base-v2 |
| |
|
| | WIP derivative checkpoint of [EssentialAI/rnj-1-instruct](https://huggingface.co/EssentialAI/rnj-1-instruct). |
| |
|
| | Baseline for tool-calling experiments. |
| |
|
| | ## Training Config |
| |
|
| | ```toml |
| | max_steps = 140 |
| | seq_len = 8192 |
| | |
| | [model] |
| | name = "EssentialAI/rnj-1-instruct" |
| | |
| | [wandb] |
| | project = "xml-tool-thinking" |
| | name = "hex-v6-bs512-rollouts16" |
| | |
| | [orchestrator.wandb.log_extras] |
| | samples = true |
| | interval = 1 |
| | |
| | [orchestrator] |
| | batch_size = 512 |
| | rollouts_per_example = 16 |
| | |
| | [orchestrator.sampling] |
| | max_tokens = 512 |
| | temperature = 1.0 |
| | |
| | [[orchestrator.env]] |
| | id = "hex_encode_xml" |
| | args = { max_turns = 5, max_chunk = 128, strict_format = true, user_prompt = """Here is a document with semantic XML tags: |
| | |
| | {doc} |
| | |
| | Your task: Encode the content of the <{tag_name}> tag to hexadecimal. |
| | |
| | You must encode in chunks of at most {max_chunk_size} characters at a time. |
| | |
| | Available tools: |
| | - get_tag_content: Get the target tag's text and length. No arguments. |
| | - encode_chunk: Encode a character range to hex. Args: start (int), end (int) |
| | |
| | <format_rules> |
| | - Every response must begin with [think] |
| | - After [/think], include your tool call |
| | - No text outside of [think]...[/think] and <tool_call>...</tool_call> |
| | </format_rules> |
| | |
| | Tool format: |
| | <tool_call> |
| | <name>tool_name</name> |
| | <param name="arg_name">value</param> |
| | </tool_call> |
| | |
| | When done, output ONLY the final hex string with no tool calls. |
| | |
| | Example: |
| | [think]I need to get the content first.[/think] |
| | <tool_call> |
| | <name>get_tag_content</name> |
| | </tool_call>""" } |
| | |
| | [trainer.model] |
| | ac = { freq = 1 } |
| | |
| | [trainer.optim] |
| | lr = 1e-6 |
| | max_norm = 0.001 |
| | |
| | [trainer.scheduler] |
| | type = "linear" |
| | warmup_steps = 30 |
| | decay_steps = 30 |
| | min_lr = 0 |
| | |
| | [inference.parallel] |
| | tp = 4 |
| | |
| | [ckpt] |
| | ``` |
| |
|