haochengsama's picture
Add files using upload-large-folder tool
97cb846 verified
Raw
History Blame Contribute Delete
3.7 kB
"""`pattern_matching` task generator."""
import json
import random
import shutil
from collections import defaultdict
from pathlib import Path
from ..base import (
Generator, _VERIFY_HEADER, _render_verify, pad_to, _write, _para,
_AUTHORS, _SONG_TITLES, _STUDENTS, _WORDS,
)
class PatternMatching(Generator):
KEY = "pattern_matching"
CATEGORY_NAME = "Pattern Matching"
DIFFICULTY = "L2"
TAGS = ["file content", "search"]
KEYWORD = "URGENT"
OUT = "matches.txt"
def build(self, env_dir, llm, rng):
snippets = llm.gen_snippets("project status logs", 5)
files = []
for s in snippets:
name = s["filename"] + ".txt"
n = rng.randint(4, 7)
inject = []
for _ in range(rng.randint(1, 2)):
idx = rng.randint(0, n - 1)
inject.append((idx, f"{self.KEYWORD}: " + " ".join(
rng.choice(_WORDS) for _ in range(5)).capitalize() + "."))
lines = _para(rng, n, inject)
_write(env_dir / name, "\n".join(lines) + "\n")
files.append(name)
return {"files": files, "keyword": self.KEYWORD, "out": self.OUT}
def description(self, spec):
return (
"Please use FileSystem tools to finish the following task:\n\n"
f"### Task: Find lines containing `{self.KEYWORD}`\n\n"
f"Scan every `.txt` file in the test directory. For each line that "
f"contains the exact (case-sensitive) substring `{self.KEYWORD}`, record "
f"a result line in this format:\n\n"
"```\n<filename>:<line_number>:<full_line_text>\n```\n\n"
"- `line_number` is 1-based.\n"
"- `full_line_text` is the matching line with trailing newline removed.\n"
f"- Sort all result lines by filename, then by line number.\n"
f"- Write the results to `{self.OUT}` in the test directory root "
f"(do not include `{self.OUT}` itself in the scan)."
)
def verify_src(self, spec):
body = '''
C = json.loads(__CONSTS__)
def expected(t):
rows = []
for f in sorted(t.iterdir()):
if not (f.is_file() and f.suffix == ".txt" and f.name != C["OUT"]):
continue
for i, line in enumerate(f.read_text(encoding="utf-8").splitlines(), 1):
if C["KEYWORD"] in line:
rows.append((f.name, i, line))
rows.sort(key=lambda r: (r[0], r[1]))
return "\\n".join(f"{n}:{i}:{l}" for n, i, l in rows)
def main():
t = get_test_dir()
out = t / C["OUT"]
if not out.is_file():
fail(f"output file not found: {C['OUT']}")
got = out.read_text(encoding="utf-8").strip()
exp = expected(t).strip()
if got != exp:
print("--- expected ---"); print(exp[:800])
print("--- got ---"); print(got[:800])
fail("matches.txt does not match expected results")
ok("all matching lines correctly recorded")
print("\\U0001f389 All checks passed!")
sys.exit(0)
if __name__ == "__main__":
main()
'''
return _render_verify(body, {"KEYWORD": spec["keyword"], "OUT": spec["out"]})
def solve(self, work_dir, spec):
rows = []
for f in sorted(work_dir.iterdir()):
if not (f.is_file() and f.suffix == ".txt" and f.name != spec["out"]):
continue
for i, line in enumerate(f.read_text(encoding="utf-8").splitlines(), 1):
if spec["keyword"] in line:
rows.append((f.name, i, line))
rows.sort(key=lambda r: (r[0], r[1]))
_write(work_dir / spec["out"], "\n".join(f"{n}:{i}:{l}" for n, i, l in rows) + "\n")