"""`pattern_matching` task generator.""" import json import random import shutil from collections import defaultdict from pathlib import Path from ..base import ( Generator, _VERIFY_HEADER, _render_verify, pad_to, _write, _para, _AUTHORS, _SONG_TITLES, _STUDENTS, _WORDS, ) class PatternMatching(Generator): KEY = "pattern_matching" CATEGORY_NAME = "Pattern Matching" DIFFICULTY = "L2" TAGS = ["file content", "search"] KEYWORD = "URGENT" OUT = "matches.txt" def build(self, env_dir, llm, rng): snippets = llm.gen_snippets("project status logs", 5) files = [] for s in snippets: name = s["filename"] + ".txt" n = rng.randint(4, 7) inject = [] for _ in range(rng.randint(1, 2)): idx = rng.randint(0, n - 1) inject.append((idx, f"{self.KEYWORD}: " + " ".join( rng.choice(_WORDS) for _ in range(5)).capitalize() + ".")) lines = _para(rng, n, inject) _write(env_dir / name, "\n".join(lines) + "\n") files.append(name) return {"files": files, "keyword": self.KEYWORD, "out": self.OUT} def description(self, spec): return ( "Please use FileSystem tools to finish the following task:\n\n" f"### Task: Find lines containing `{self.KEYWORD}`\n\n" f"Scan every `.txt` file in the test directory. For each line that " f"contains the exact (case-sensitive) substring `{self.KEYWORD}`, record " f"a result line in this format:\n\n" "```\n::\n```\n\n" "- `line_number` is 1-based.\n" "- `full_line_text` is the matching line with trailing newline removed.\n" f"- Sort all result lines by filename, then by line number.\n" f"- Write the results to `{self.OUT}` in the test directory root " f"(do not include `{self.OUT}` itself in the scan)." ) def verify_src(self, spec): body = ''' C = json.loads(__CONSTS__) def expected(t): rows = [] for f in sorted(t.iterdir()): if not (f.is_file() and f.suffix == ".txt" and f.name != C["OUT"]): continue for i, line in enumerate(f.read_text(encoding="utf-8").splitlines(), 1): if C["KEYWORD"] in line: rows.append((f.name, i, line)) rows.sort(key=lambda r: (r[0], r[1])) return "\\n".join(f"{n}:{i}:{l}" for n, i, l in rows) def main(): t = get_test_dir() out = t / C["OUT"] if not out.is_file(): fail(f"output file not found: {C['OUT']}") got = out.read_text(encoding="utf-8").strip() exp = expected(t).strip() if got != exp: print("--- expected ---"); print(exp[:800]) print("--- got ---"); print(got[:800]) fail("matches.txt does not match expected results") ok("all matching lines correctly recorded") print("\\U0001f389 All checks passed!") sys.exit(0) if __name__ == "__main__": main() ''' return _render_verify(body, {"KEYWORD": spec["keyword"], "OUT": spec["out"]}) def solve(self, work_dir, spec): rows = [] for f in sorted(work_dir.iterdir()): if not (f.is_file() and f.suffix == ".txt" and f.name != spec["out"]): continue for i, line in enumerate(f.read_text(encoding="utf-8").splitlines(), 1): if spec["keyword"] in line: rows.append((f.name, i, line)) rows.sort(key=lambda r: (r[0], r[1])) _write(work_dir / spec["out"], "\n".join(f"{n}:{i}:{l}" for n, i, l in rows) + "\n")