| """`pattern_matching` task generator.""" |
| import json |
| import random |
| import shutil |
| from collections import defaultdict |
| from pathlib import Path |
|
|
| from ..base import ( |
| Generator, _VERIFY_HEADER, _render_verify, pad_to, _write, _para, |
| _AUTHORS, _SONG_TITLES, _STUDENTS, _WORDS, |
| ) |
|
|
|
|
| class PatternMatching(Generator): |
| KEY = "pattern_matching" |
| CATEGORY_NAME = "Pattern Matching" |
| DIFFICULTY = "L2" |
| TAGS = ["file content", "search"] |
|
|
| KEYWORD = "URGENT" |
| OUT = "matches.txt" |
|
|
| def build(self, env_dir, llm, rng): |
| snippets = llm.gen_snippets("project status logs", 5) |
| files = [] |
| for s in snippets: |
| name = s["filename"] + ".txt" |
| n = rng.randint(4, 7) |
| inject = [] |
| for _ in range(rng.randint(1, 2)): |
| idx = rng.randint(0, n - 1) |
| inject.append((idx, f"{self.KEYWORD}: " + " ".join( |
| rng.choice(_WORDS) for _ in range(5)).capitalize() + ".")) |
| lines = _para(rng, n, inject) |
| _write(env_dir / name, "\n".join(lines) + "\n") |
| files.append(name) |
| return {"files": files, "keyword": self.KEYWORD, "out": self.OUT} |
|
|
| def description(self, spec): |
| return ( |
| "Please use FileSystem tools to finish the following task:\n\n" |
| f"### Task: Find lines containing `{self.KEYWORD}`\n\n" |
| f"Scan every `.txt` file in the test directory. For each line that " |
| f"contains the exact (case-sensitive) substring `{self.KEYWORD}`, record " |
| f"a result line in this format:\n\n" |
| "```\n<filename>:<line_number>:<full_line_text>\n```\n\n" |
| "- `line_number` is 1-based.\n" |
| "- `full_line_text` is the matching line with trailing newline removed.\n" |
| f"- Sort all result lines by filename, then by line number.\n" |
| f"- Write the results to `{self.OUT}` in the test directory root " |
| f"(do not include `{self.OUT}` itself in the scan)." |
| ) |
|
|
| def verify_src(self, spec): |
| body = ''' |
| C = json.loads(__CONSTS__) |
| |
| |
| def expected(t): |
| rows = [] |
| for f in sorted(t.iterdir()): |
| if not (f.is_file() and f.suffix == ".txt" and f.name != C["OUT"]): |
| continue |
| for i, line in enumerate(f.read_text(encoding="utf-8").splitlines(), 1): |
| if C["KEYWORD"] in line: |
| rows.append((f.name, i, line)) |
| rows.sort(key=lambda r: (r[0], r[1])) |
| return "\\n".join(f"{n}:{i}:{l}" for n, i, l in rows) |
| |
| |
| def main(): |
| t = get_test_dir() |
| out = t / C["OUT"] |
| if not out.is_file(): |
| fail(f"output file not found: {C['OUT']}") |
| got = out.read_text(encoding="utf-8").strip() |
| exp = expected(t).strip() |
| if got != exp: |
| print("--- expected ---"); print(exp[:800]) |
| print("--- got ---"); print(got[:800]) |
| fail("matches.txt does not match expected results") |
| ok("all matching lines correctly recorded") |
| print("\\U0001f389 All checks passed!") |
| sys.exit(0) |
| |
| |
| if __name__ == "__main__": |
| main() |
| ''' |
| return _render_verify(body, {"KEYWORD": spec["keyword"], "OUT": spec["out"]}) |
|
|
| def solve(self, work_dir, spec): |
| rows = [] |
| for f in sorted(work_dir.iterdir()): |
| if not (f.is_file() and f.suffix == ".txt" and f.name != spec["out"]): |
| continue |
| for i, line in enumerate(f.read_text(encoding="utf-8").splitlines(), 1): |
| if spec["keyword"] in line: |
| rows.append((f.name, i, line)) |
| rows.sort(key=lambda r: (r[0], r[1])) |
| _write(work_dir / spec["out"], "\n".join(f"{n}:{i}:{l}" for n, i, l in rows) + "\n") |
|
|