kalomaze
/

hex-toolcall-base-v2

Model card Files Files and versions

hex-toolcall-base-v2 / README.md

kalomaze's picture

Upload README.md with huggingface_hub

15fec57 verified 21 days ago

|

history blame contribute delete

1.76 kB

	---
	license: apache-2.0
	base_model: EssentialAI/rnj-1-instruct
	---

	# hex-toolcall-base-v2

	WIP derivative checkpoint of [EssentialAI/rnj-1-instruct](https://huggingface.co/EssentialAI/rnj-1-instruct).

	Baseline for tool-calling experiments.

	## Training Config

	```toml
	max_steps = 140
	seq_len = 8192

	[model]
	name = "EssentialAI/rnj-1-instruct"

	[wandb]
	project = "xml-tool-thinking"
	name = "hex-v6-bs512-rollouts16"

	[orchestrator.wandb.log_extras]
	samples = true
	interval = 1

	[orchestrator]
	batch_size = 512
	rollouts_per_example = 16

	[orchestrator.sampling]
	max_tokens = 512
	temperature = 1.0

	[[orchestrator.env]]
	id = "hex_encode_xml"
	args = { max_turns = 5, max_chunk = 128, strict_format = true, user_prompt = """Here is a document with semantic XML tags:

	{doc}

	Your task: Encode the content of the <{tag_name}> tag to hexadecimal.

	You must encode in chunks of at most {max_chunk_size} characters at a time.

	Available tools:
	- get_tag_content: Get the target tag's text and length. No arguments.
	- encode_chunk: Encode a character range to hex. Args: start (int), end (int)

	<format_rules>
	- Every response must begin with [think]
	- After [/think], include your tool call
	- No text outside of [think]...[/think] and <tool_call>...</tool_call>
	</format_rules>

	Tool format:
	<tool_call>
	<name>tool_name</name>
	<param name="arg_name">value</param>
	</tool_call>

	When done, output ONLY the final hex string with no tool calls.

	Example:
	[think]I need to get the content first.[/think]
	<tool_call>
	<name>get_tag_content</name>
	</tool_call>""" }

	[trainer.model]
	ac = { freq = 1 }

	[trainer.optim]
	lr = 1e-6
	max_norm = 0.001

	[trainer.scheduler]
	type = "linear"
	warmup_steps = 30
	decay_steps = 30
	min_lr = 0

	[inference.parallel]
	tp = 4

	[ckpt]
	```