Instructions to use prometheus04/qwen3-4b-thinking-microagent with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use prometheus04/qwen3-4b-thinking-microagent with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| """Convert Nemotron-Terminal-Corpus (Terminus 2 format) to MicroAgent format. | |
| Strategy: multi-line <bash> block (Path D). | |
| Filters (strict): | |
| - First turn must be user (system+task) | |
| - All assistant JSONs must parse cleanly | |
| - No "parsing error" observations | |
| - Final assistant turn must have task_complete=true AND zero commands | |
| (clean "finish" semantics) | |
| - num_turns sanity: 2 <= assistant turns <= 20 | |
| Conversion: | |
| - System prompt -> MicroAgent's (with multi-line bash allowed) | |
| - First user turn -> extract task between markers | |
| - Each non-final assistant turn -> <think>...</think>\n<bash>cmds joined with \n</bash> | |
| - Each final assistant turn -> <think>...</think>\n<finish>summary</finish> | |
| - Each observation -> strip "New Terminal Output:" prefix, head+tail truncate | |
| Output: JSONL with one trajectory per line. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import re | |
| import sys | |
| from collections import Counter | |
| from dataclasses import dataclass | |
| os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1") | |
| # --- Configuration --- | |
| SYSTEM_PROMPT = """You are a terminal agent. You complete tasks by running bash commands in a Linux container. | |
| Respond in EXACTLY this format every turn: | |
| <think>brief reasoning, max 2 sentences</think> | |
| <bash>shell commands (one or more lines)</bash> | |
| When the task is fully complete and verified, respond instead with: | |
| <think>brief verification reasoning</think> | |
| <finish>one-line summary of what you did</finish> | |
| Rules: | |
| - One bash block per turn. Multiple lines run sequentially in the same shell. | |
| - Multi-line files: cat <<'EOF' > path/to/file ... EOF | |
| - Output is truncated (head + tail). Lines starting with [exit N] mean non-zero exit. | |
| - Analyze errors before retrying. Never repeat a failed command unchanged. | |
| - You have a hard turn limit. Use turns efficiently.""" | |
| OBS_HEAD_CHARS = 600 | |
| OBS_TAIL_CHARS = 600 | |
| MIN_ASSISTANT_TURNS = 2 | |
| MAX_ASSISTANT_TURNS = 20 | |
| MAX_FINISH_SUMMARY_CHARS = 200 | |
| TASK_START_MARKER = "Task Description:" | |
| TASK_END_MARKER = "Current terminal state:" | |
| INITIAL_STATE_MARKER = "Current Terminal Screen:" | |
| OBS_PREFIX = "New Terminal Output:" | |
| _THINK_RE = re.compile(r"<think>(.*?)</think>", re.DOTALL) | |
| _JSON_RE = re.compile(r"\{.*\}", re.DOTALL) | |
| _WHITESPACE_RE = re.compile(r"\n{3,}") | |
| # --- Reject reason tracking --- | |
| class RejectStats: | |
| total: int = 0 | |
| accepted: int = 0 | |
| # named reject reasons: | |
| too_few_turns: int = 0 | |
| too_many_turns: int = 0 | |
| first_not_user: int = 0 | |
| no_task_marker: int = 0 | |
| parse_error_observation: int = 0 | |
| assistant_json_unparseable: int = 0 | |
| final_not_complete: int = 0 | |
| final_has_commands: int = 0 | |
| empty_commands_midtrajectory: int = 0 | |
| other: int = 0 | |
| # --- Extraction helpers --- | |
| def extract_think(content: str) -> str: | |
| m = _THINK_RE.search(content) | |
| return m.group(1).strip() if m else "" | |
| def extract_json(content: str): | |
| """Pull JSON object out of an assistant turn (skipping the optional <think>).""" | |
| after_think = _THINK_RE.sub("", content).strip() | |
| m = _JSON_RE.search(after_think) | |
| if not m: | |
| return None | |
| try: | |
| return json.loads(m.group(0)) | |
| except Exception: | |
| # try a more lenient pass: pick first {...} that parses | |
| depth = 0 | |
| start = None | |
| for i, ch in enumerate(after_think): | |
| if ch == "{": | |
| if depth == 0: | |
| start = i | |
| depth += 1 | |
| elif ch == "}": | |
| depth -= 1 | |
| if depth == 0 and start is not None: | |
| try: | |
| return json.loads(after_think[start : i + 1]) | |
| except Exception: | |
| start = None | |
| return None | |
| def extract_task(first_user_content: str) -> str | None: | |
| start = first_user_content.find(TASK_START_MARKER) | |
| if start < 0: | |
| return None | |
| start += len(TASK_START_MARKER) | |
| end = first_user_content.find(TASK_END_MARKER, start) | |
| if end < 0: | |
| end = len(first_user_content) | |
| task = first_user_content[start:end].strip() | |
| return task if task else None | |
| def extract_initial_state(first_user_content: str) -> str: | |
| idx = first_user_content.find(INITIAL_STATE_MARKER) | |
| if idx < 0: | |
| return "" | |
| state = first_user_content[idx + len(INITIAL_STATE_MARKER) :].strip() | |
| # Bound the size | |
| return state[:1500] | |
| def truncate_output(text: str, head: int = OBS_HEAD_CHARS, tail: int = OBS_TAIL_CHARS) -> str: | |
| if not text: | |
| return "(no output)" | |
| if len(text) <= head + tail + 50: | |
| return text | |
| cut = len(text) - head - tail | |
| return f"{text[:head]}\n[... truncated {cut} chars ...]\n{text[-tail:]}" | |
| def clean_observation(content: str) -> str: | |
| """Drop 'New Terminal Output:' prefix, then head+tail truncate.""" | |
| s = content | |
| if s.startswith(OBS_PREFIX): | |
| s = s[len(OBS_PREFIX) :] | |
| s = s.strip() | |
| # Collapse runs of 3+ newlines | |
| s = _WHITESPACE_RE.sub("\n\n", s) | |
| return truncate_output(s) | |
| def keystrokes_to_command(keystrokes: str) -> str: | |
| """Convert one Terminus 2 keystrokes string to a bash line. | |
| Strips trailing \\n / \\r since bash interprets newlines as command separators | |
| when joined. | |
| """ | |
| s = keystrokes | |
| # Strip trailing line endings - they were Enter keys | |
| while s and s[-1] in "\r\n": | |
| s = s[:-1] | |
| return s | |
| def make_finish_summary(parsed_json: dict) -> str: | |
| """Compose a one-line finish summary from the JSON's analysis field.""" | |
| analysis = (parsed_json.get("analysis") or "").strip() | |
| if not analysis: | |
| return "task complete" | |
| # Take first sentence-ish | |
| first = re.split(r"(?<=[.!?])\s", analysis, maxsplit=1)[0] | |
| summary = first.strip() | |
| if len(summary) > MAX_FINISH_SUMMARY_CHARS: | |
| summary = summary[: MAX_FINISH_SUMMARY_CHARS - 3].rstrip() + "..." | |
| return summary or "task complete" | |
| def build_bash_block(parsed_json: dict) -> str | None: | |
| commands = parsed_json.get("commands") | |
| if not isinstance(commands, list) or not commands: | |
| return None | |
| lines: list[str] = [] | |
| for cmd in commands: | |
| if not isinstance(cmd, dict): | |
| return None | |
| ks = cmd.get("keystrokes", "") | |
| if not isinstance(ks, str): | |
| continue | |
| clean = keystrokes_to_command(ks) | |
| if clean.strip(): | |
| lines.append(clean) | |
| if not lines: | |
| return None | |
| return "\n".join(lines) | |
| # --- Main converter --- | |
| def convert_trajectory(row: dict, source_config: str, stats: RejectStats) -> dict | None: | |
| stats.total += 1 | |
| conv = row.get("conversations") or [] | |
| if len(conv) < 3: # need at least user + assistant + something | |
| stats.too_few_turns += 1 | |
| return None | |
| if conv[0].get("role") != "user": | |
| stats.first_not_user += 1 | |
| return None | |
| task = extract_task(conv[0]["content"]) | |
| if not task: | |
| stats.no_task_marker += 1 | |
| return None | |
| initial_state = extract_initial_state(conv[0]["content"]) | |
| other = conv[1:] | |
| # Quick parse-error check on observations | |
| for t in other: | |
| if t.get("role") == "user": | |
| c = t.get("content", "") | |
| low = c.lower() | |
| if "parsing error" in low or "no valid json found in response" in low: | |
| stats.parse_error_observation += 1 | |
| return None | |
| # Parse all assistant JSONs | |
| assistant_idxs = [i for i, t in enumerate(other) if t.get("role") == "assistant"] | |
| if len(assistant_idxs) < MIN_ASSISTANT_TURNS: | |
| stats.too_few_turns += 1 | |
| return None | |
| if len(assistant_idxs) > MAX_ASSISTANT_TURNS: | |
| stats.too_many_turns += 1 | |
| return None | |
| parsed_per_assistant = [] | |
| for i in assistant_idxs: | |
| parsed = extract_json(other[i]["content"]) | |
| if parsed is None: | |
| stats.assistant_json_unparseable += 1 | |
| return None | |
| parsed_per_assistant.append(parsed) | |
| final_parsed = parsed_per_assistant[-1] | |
| if final_parsed.get("task_complete") is not True: | |
| stats.final_not_complete += 1 | |
| return None | |
| final_cmds = final_parsed.get("commands") or [] | |
| if isinstance(final_cmds, list) and len(final_cmds) > 0: | |
| # Cleanest semantics: final turn declares done with no commands. Drop the rest. | |
| stats.final_has_commands += 1 | |
| return None | |
| # Build new conversation | |
| new_conv = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| ] | |
| pinned = f"TASK:\n{task}" | |
| if initial_state: | |
| pinned += f"\n\nInitial state:\n{initial_state}" | |
| new_conv.append({"role": "user", "content": pinned}) | |
| parsed_idx = 0 | |
| final_assistant_pos = assistant_idxs[-1] | |
| for i, t in enumerate(other): | |
| role = t.get("role") | |
| if role == "assistant": | |
| think = extract_think(t.get("content", "")) | |
| parsed = parsed_per_assistant[parsed_idx] | |
| parsed_idx += 1 | |
| is_final = (i == final_assistant_pos) | |
| if is_final: | |
| summary = make_finish_summary(parsed) | |
| action = f"<finish>{summary}</finish>" | |
| else: | |
| bash_block = build_bash_block(parsed) | |
| if bash_block is None: | |
| # Mid-trajectory turn with no commands - reject the trajectory | |
| stats.empty_commands_midtrajectory += 1 | |
| return None | |
| action = f"<bash>{bash_block}</bash>" | |
| if think: | |
| content = f"<think>{think}</think>\n{action}" | |
| else: | |
| content = action | |
| new_conv.append({"role": "assistant", "content": content}) | |
| elif role == "user": | |
| obs = clean_observation(t.get("content", "")) | |
| new_conv.append({"role": "user", "content": obs}) | |
| stats.accepted += 1 | |
| return { | |
| "conversations": new_conv, | |
| "task": row.get("task"), | |
| "episode": row.get("episode"), | |
| "run_id": row.get("run_id"), | |
| "source_config": source_config, | |
| "n_assistant_turns": len(assistant_idxs), | |
| } | |
| # --- Driver --- | |
| def main(configs, sample_per_config, out_path): | |
| from datasets import load_dataset | |
| REPO = "nvidia/Nemotron-Terminal-Corpus" | |
| stats = RejectStats() | |
| per_config_accepted = Counter() | |
| written = 0 | |
| with open(out_path, "w", encoding="utf-8") as out: | |
| for cfg in configs: | |
| print(f"\n--- Loading {cfg} (streaming) ---", flush=True) | |
| try: | |
| ds = load_dataset(REPO, cfg, streaming=True) | |
| except Exception as e: | |
| print(f" could not load {cfg}: {e}") | |
| continue | |
| split = list(ds.keys())[0] | |
| stream = ds[split] | |
| cfg_count = 0 | |
| for row in stream: | |
| if sample_per_config and cfg_count >= sample_per_config: | |
| break | |
| cfg_count += 1 | |
| try: | |
| converted = convert_trajectory(row, cfg, stats) | |
| except Exception as e: | |
| stats.other += 1 | |
| continue | |
| if converted is not None: | |
| out.write(json.dumps(converted, ensure_ascii=False) + "\n") | |
| per_config_accepted[cfg] += 1 | |
| written += 1 | |
| if cfg_count % 500 == 0: | |
| print( | |
| f" {cfg}: scanned={cfg_count} accepted={per_config_accepted[cfg]}", | |
| flush=True, | |
| ) | |
| print( | |
| f" {cfg}: DONE scanned={cfg_count} accepted={per_config_accepted[cfg]}", | |
| flush=True, | |
| ) | |
| print("\n\n========== FINAL STATS ==========") | |
| print(f"Total scanned : {stats.total}") | |
| print(f"Total accepted : {stats.accepted}") | |
| print(f"Output path : {out_path}") | |
| print(f"\nReject breakdown:") | |
| for k in [ | |
| "too_few_turns", | |
| "too_many_turns", | |
| "first_not_user", | |
| "no_task_marker", | |
| "parse_error_observation", | |
| "assistant_json_unparseable", | |
| "final_not_complete", | |
| "final_has_commands", | |
| "empty_commands_midtrajectory", | |
| "other", | |
| ]: | |
| v = getattr(stats, k) | |
| pct = 100.0 * v / max(stats.total, 1) | |
| print(f" {k:35s} {v:6d} ({pct:5.1f}%)") | |
| print("\nPer-config accepted:") | |
| for k, v in per_config_accepted.most_common(): | |
| print(f" {k:25s} {v}") | |
| if __name__ == "__main__": | |
| # Default: sample 200 from each of the working configs | |
| configs = ["skill_based_easy", "skill_based_medium", "skill_based_mixed"] | |
| sample = 200 | |
| out = "data/converted_sample.jsonl" | |
| if len(sys.argv) > 1: | |
| sample = int(sys.argv[1]) if sys.argv[1] != "all" else None | |
| if len(sys.argv) > 2: | |
| out = sys.argv[2] | |
| main(configs, sample, out) | |