#!/usr/bin/env python3 """Assemble the quest-classification SFT dataset from verified teacher labels. Inputs: data/quest_labels/labeled.json - verified matches per project (from the Workflow) data/quest_labels/in/.json - the exact README / APP_FILE segments shown to the labeller Builds one natural example per project plus targeted augmentations so every case the prompt must handle is represented: app-only signal, readme-only signal, a missing app file, README/app contradictions, empty matches, and noisy metadata. Writes data/quest_sft.jsonl (manifest + examples) and prints a coverage report. """ from __future__ import annotations import argparse import json from pathlib import Path import re import sys ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(ROOT)) from hackathon_advisor.quest_dataset import build_dataset_jsonl, build_example, parse_quest_dataset_jsonl from hackathon_advisor.quest_taxonomy import normalize_match, render_quest_prompt NO_README = "(no README description provided)" NO_APP = "(no app file available)" IN_DIR = ROOT / "data" / "quest_labels" / "in" def load_input(slug: str) -> dict: return json.loads((IN_DIR / f"{slug}.json").read_text(encoding="utf-8")) def prompt_for(meta: dict, readme: str, app: str) -> str: return render_quest_prompt( title=meta.get("title", ""), sdk=meta.get("sdk", ""), declared_models=meta.get("declared_models", []), tags=meta.get("tags", []), readme_segment=readme, app_file_name=meta.get("app_file", ""), app_file_segment=app, ) def example(meta: dict, readme: str, app: str, matches: list[dict], *, variant: str) -> dict: return build_example( prompt_for(meta, readme, app), [normalize_match(m) for m in matches], meta={"kind": "quest_classification", "project_id": meta.get("id", ""), "variant": variant}, ) # --- synthetic README/app contradictions: README screams "local/offline" but the app # clearly calls a proprietary cloud API, so Off the Grid must NOT be awarded. --- CONTRADICTIONS = [ { "id": "synthetic/contradiction-1", "title": "PocketScribe — fully local notes", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# PocketScribe\nPocketScribe is a 100% offline, fully local note-taking assistant. " "No API keys, no cloud, runs entirely on your own laptop for total privacy.", "app": "import gradio as gr\nfrom openai import OpenAI\nclient = OpenAI()\n\n" "def summarize(note):\n r = client.chat.completions.create(model='gpt-4o-mini', " "messages=[{'role':'user','content':note}])\n return r.choices[0].message.content\n\n" "gr.Interface(summarize, 'text', 'text').launch()", "matches": [ {"quest": "Backyard AI", "confidence": 0.55, "evidence": "personal note-taking assistant", "source": "readme"}, ], }, { "id": "synthetic/contradiction-2", "title": "HomeVet offline pet advisor", "declared_models": [], "tags": ["gradio", "pets"], "app_file": "app.py", "readme": "# HomeVet\nAn offline, local-first pet-care helper for my own dog. Works without the " "internet and keeps everything on-device. Built for a real person: my family.", "app": "import gradio as gr\nimport anthropic\nclient = anthropic.Anthropic()\n\n" "def advise(symptom):\n msg = client.messages.create(model='claude-3-5-sonnet-20241022', " "max_tokens=300, messages=[{'role':'user','content':symptom}])\n return msg.content[0].text\n\n" "with gr.Blocks() as demo:\n gr.Markdown('# HomeVet')\n inp = gr.Textbox()\n out = gr.Textbox()\n" " gr.Button('Ask').click(advise, inp, out)\ndemo.launch()", "matches": [ {"quest": "Backyard AI", "confidence": 0.7, "evidence": "pet-care helper for my own dog", "source": "readme"}, ], }, { "id": "synthetic/contradiction-3", "title": "GridFree storyteller", "declared_models": [], "tags": ["gradio", "story"], "app_file": "app.py", "readme": "# GridFree\nA delightful local, no-cloud bedtime-story generator. Runs off the grid, " "no proprietary APIs, entirely on your machine.", "app": "import gradio as gr, requests, os\n\nAPI='https://api.openai.com/v1/chat/completions'\n" "def story(theme):\n r=requests.post(API, headers={'Authorization':'Bearer '+os.environ['OPENAI_API_KEY']}," " json={'model':'gpt-4o','messages':[{'role':'user','content':theme}]})\n return r.json()\n\n" "gr.Interface(story,'text','text', css='.gradio-container{background:#102}').launch()", "matches": [ {"quest": "Thousand Token Wood", "confidence": 0.6, "evidence": "bedtime-story generator", "source": "readme"}, {"quest": "Off-Brand", "confidence": 0.5, "evidence": "custom css background styling", "source": "app_file"}, ], }, { "id": "synthetic/contradiction-4", "title": "LocalLlama claim vs Gemini app", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# QuietDesk\nRuns llama.cpp locally with GGUF weights — completely offline, your data never leaves " "the device. A calm local-first desktop assistant.", "app": "import gradio as gr\nimport google.generativeai as genai\ngenai.configure(api_key='...')\n" "model = genai.GenerativeModel('gemini-1.5-flash')\n\n" "def reply(q):\n return model.generate_content(q).text\n\n" "gr.ChatInterface(reply).launch()", "matches": [], }, { "id": "synthetic/contradiction-5", "title": "Edge claim, cohere app", "declared_models": ["CohereForAI/command-r"], "tags": ["gradio"], "app_file": "app.py", "readme": "# EdgeMind\nEdgeMind is an on-device, fully local agent. No external services. Includes a write-up of " "every build decision in our field notes below.\n## Field Notes\nDay 1: chose a tiny model...", "app": "import gradio as gr, cohere\nco = cohere.Client('KEY')\n\n" "def run(q):\n return co.chat(message=q, model='command-r').text\n\n" "gr.Interface(run,'text','text').launch()", "matches": [ {"quest": "Field Notes", "confidence": 0.7, "evidence": "write-up of every build decision", "source": "readme"}, ], }, { "id": "synthetic/contradiction-6", "title": "README understates a clearly local app", "declared_models": ["openbmb/MiniCPM5-1B"], "tags": ["gradio"], "app_file": "app.py", "readme": "# Helper\nA small helper app. (No further description.)", "app": "import gradio as gr\nfrom llama_cpp import Llama\n" "llm = Llama.from_pretrained('openbmb/MiniCPM5-1B-GGUF', filename='*Q4_K_M.gguf')\n\n" "def chat(m):\n return llm.create_chat_completion(messages=[{'role':'user','content':m}])\n\n" "gr.Interface(chat,'text','text').launch()", "matches": [ {"quest": "Off the Grid", "confidence": 0.85, "evidence": "local llama_cpp GGUF inference", "source": "app_file"}, {"quest": "Llama Champion", "confidence": 0.9, "evidence": "from llama_cpp import Llama", "source": "app_file"}, {"quest": "OpenBMB", "confidence": 0.85, "evidence": "openbmb/MiniCPM5-1B-GGUF", "source": "app_file"}, {"quest": "Tiny Titan", "confidence": 0.75, "evidence": "MiniCPM5-1B is ~1B params", "source": "app_file"}, ], }, ] # A couple of fully-empty-signal samples beyond whatever empties occur naturally. EMPTY_SAMPLES = [ { "id": "synthetic/empty-1", "title": "My Build Small Hackathon", "declared_models": [], "tags": ["gradio", "region:us"], "app_file": "app.py", "readme": "Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference", "app": "import gradio as gr\n\ndef greet(name):\n return 'Hello ' + name\n\n" "gr.Interface(fn=greet, inputs='text', outputs='text').launch()", }, { "id": "synthetic/empty-2", "title": "todo", "declared_models": [], "tags": ["gradio"], "app_file": "", "readme": "todo", "app": NO_APP, }, ] # Real projects (kept in the corpus) whose app calls a REMOTE inference endpoint. # Their teacher labels already exclude Off the Grid; app-only variants force the model # to judge the remote-inference app directly instead of leaning on its strong prior. REMOTE_INFERENCE_SLUGS = [ "GTROX", "ai-study-buddy", "come-and-compare", "AI-agent-Evaluation-pipeline", "Sprout-And-Spoon", "The-Shrine", "Backyard-Demo-Builder", "persona-atlas", "Structured-Data-Rescuer", "nutrilens", "ux-crime-scene", "wpl-discovery", "legawa", "business-order-assistant", "cloud-parade-cabinet", "gitopadesh", ] # Hand-authored contrastive hard negatives for two observed failure modes: # (1) a REMOTE inference call (InferenceClient / endpoints / replicate / *.modal.run) # must NOT earn Off the Grid, whatever model it names; # (2) OpenBMB belongs only to openbmb/ models and Tiny Titan only to <=4B models, # so a non-openbmb / large model id must not trigger them. Positive anchors keep # the model from over-correcting on genuinely local openbmb / small models. HARD_NEGATIVES = [ { "id": "synthetic/remote-gptoss-empty", "title": "Chat Demo", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# Chat Demo\nA simple chat space.", "app": "import gradio as gr\nfrom huggingface_hub import InferenceClient\n" "client = InferenceClient(model=\"openai/gpt-oss-20b\")\n\n" "def respond(m, history):\n return client.chat_completion(m).choices[0].message.content\n\n" "gr.ChatInterface(respond).launch()", "matches": [], }, { "id": "synthetic/remote-qwen-offbrand", "title": "NeonChat", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# NeonChat\nA chat UI with a neon theme.", "app": "import gradio as gr\nfrom huggingface_hub import InferenceClient\n" "client = InferenceClient(model=\"Qwen/Qwen2.5-72B-Instruct\")\n" "CUSTOM_CSS = '.gradio-container{background:#0a0a14} .msg{box-shadow:0 0 12px #0ff}'\n\n" "def reply(m, h):\n return client.chat_completion(m).choices[0].message.content\n\n" "demo = gr.Blocks(css=CUSTOM_CSS)\n", "matches": [ {"quest": "Off-Brand", "confidence": 0.78, "evidence": "gr.Blocks(css=CUSTOM_CSS) neon custom styling", "source": "app_file"}, ], }, { "id": "synthetic/remote-endpoint-backyard", "title": "PillReader", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# PillReader\nHelps my grandmother read the small print on her medication labels and " "set reminders, so she can manage her prescriptions without calling me every day.", "app": "import requests, gradio as gr\n" "ENDPOINT = \"https://abc123.endpoints.huggingface.cloud\"\n\n" "def read(image):\n return requests.post(ENDPOINT, files={'image': image}).json()['text']\n\n" "gr.Interface(read, 'image', 'text').launch()", "matches": [ {"quest": "Backyard AI", "confidence": 0.85, "evidence": "helps my grandmother read medication labels", "source": "readme"}, ], }, { "id": "synthetic/remote-replicate-ttw", "title": "DreamPostcards", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# DreamPostcards\nA whimsical generator that turns a sentence about your day into a " "dreamy illustrated postcard from an imaginary seaside town.", "app": "import replicate, gradio as gr\n\n" "def make(prompt):\n return replicate.run('black-forest-labs/flux-schnell', input={'prompt': prompt})\n\n" "gr.Interface(make, 'text', 'image').launch()", "matches": [ {"quest": "Thousand Token Wood", "confidence": 0.8, "evidence": "dreamy illustrated postcard generator", "source": "readme"}, ], }, { "id": "synthetic/remote-together-empty", "title": "AskAnything", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# AskAnything\nAsk a question.", "app": "import gradio as gr\nfrom together import Together\nclient = Together()\n\n" "def ask(q):\n return client.chat.completions.create(model='openai/gpt-oss-120b', " "messages=[{'role':'user','content':q}]).choices[0].message.content\n\n" "gr.Interface(ask, 'text', 'text').launch()", "matches": [], }, { "id": "synthetic/remote-modalrun-modal", "title": "FastSummarizer", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# FastSummarizer\nSummarizes long text. The model is served on Modal.", "app": "import requests, gradio as gr\n" "MODAL_URL = \"https://myorg--summarizer-serve.modal.run\"\n\n" "def summarize(text):\n return requests.post(MODAL_URL, json={'text': text}).json()['summary']\n\n" "gr.Interface(summarize, 'text', 'text').launch()", "matches": [ {"quest": "Modal", "confidence": 0.85, "evidence": "model served at *.modal.run endpoint", "source": "app_file"}, ], }, { "id": "synthetic/remote-gradioclient-empty", "title": "Proxy Chat", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# Proxy Chat\nChat front-end.", "app": "import gradio as gr\nfrom gradio_client import Client\n" "client = Client(\"someorg/big-llm-space\")\n\n" "def chat(m):\n return client.predict(m, api_name='/chat')\n\n" "gr.Interface(chat, 'text', 'text').launch()", "matches": [], }, { "id": "synthetic/remote-openrouter-empty", "title": "RouterBot", "declared_models": [], "tags": ["gradio"], "app_file": "app.py", "readme": "# RouterBot\nA chatbot.", "app": "import gradio as gr\nfrom openai import OpenAI\n" "client = OpenAI(base_url='https://openrouter.ai/api/v1', api_key='...')\n\n" "def reply(m):\n return client.chat.completions.create(model='meta-llama/llama-3.1-8b', " "messages=[{'role':'user','content':m}]).choices[0].message.content\n\n" "gr.Interface(reply, 'text', 'text').launch()", "matches": [], }, { "id": "synthetic/local-gptoss20b", "title": "LocalGPTOSS", "declared_models": ["openai/gpt-oss-20b"], "tags": ["gradio"], "app_file": "app.py", "readme": "# LocalGPTOSS\nRuns gpt-oss locally.", "app": "import gradio as gr\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n" "model = AutoModelForCausalLM.from_pretrained(\"openai/gpt-oss-20b\", torch_dtype='auto', device_map='cuda')\n" "tok = AutoTokenizer.from_pretrained(\"openai/gpt-oss-20b\")\n\n" "def gen(p):\n ids = tok(p, return_tensors='pt').to('cuda')\n return tok.decode(model.generate(**ids)[0])\n\n" "gr.Interface(gen, 'text', 'text').launch()", "matches": [ {"quest": "Off the Grid", "confidence": 0.88, "evidence": "AutoModelForCausalLM.from_pretrained, in-process, no remote call", "source": "app_file"}, ], }, { "id": "synthetic/local-qwen7b", "title": "Qwen7B Helper", "declared_models": ["Qwen/Qwen2.5-7B-Instruct"], "tags": ["gradio"], "app_file": "app.py", "readme": "# Qwen7B Helper\nA local assistant.", "app": "import gradio as gr\nfrom transformers import pipeline\n" "pipe = pipeline('text-generation', model=\"Qwen/Qwen2.5-7B-Instruct\", device_map='auto')\n\n" "def run(p):\n return pipe(p)[0]['generated_text']\n\n" "gr.Interface(run, 'text', 'text').launch()", "matches": [ {"quest": "Off the Grid", "confidence": 0.85, "evidence": "local transformers pipeline, no remote inference", "source": "app_file"}, ], }, { "id": "synthetic/local-llamacpp-qwen", "title": "Pocket Qwen", "declared_models": ["Qwen/Qwen2.5-7B-Instruct-GGUF"], "tags": ["gradio"], "app_file": "app.py", "readme": "# Pocket Qwen\nRuns a GGUF model on your laptop.", "app": "import gradio as gr\nfrom llama_cpp import Llama\n" "llm = Llama.from_pretrained(\"Qwen/Qwen2.5-7B-Instruct-GGUF\", filename=\"*Q4_K_M.gguf\")\n\n" "def chat(m):\n return llm.create_chat_completion(messages=[{'role':'user','content':m}])\n\n" "gr.Interface(chat, 'text', 'text').launch()", "matches": [ {"quest": "Llama Champion", "confidence": 0.95, "evidence": "from llama_cpp import Llama GGUF weights", "source": "app_file"}, {"quest": "Off the Grid", "confidence": 0.88, "evidence": "local llama_cpp GGUF inference, no remote call", "source": "app_file"}, ], }, { "id": "synthetic/local-llama3b-tiny", "title": "Tiny Llama Buddy", "declared_models": ["meta-llama/Llama-3.2-3B-Instruct"], "tags": ["gradio"], "app_file": "app.py", "readme": "# Tiny Llama Buddy\nA small local helper.", "app": "import gradio as gr\nfrom transformers import AutoModelForCausalLM\n" "model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\", device_map='cuda')\n\n" "def gen(p):\n return model_generate(p)\n\n" "gr.Interface(gen, 'text', 'text').launch()", "matches": [ {"quest": "Off the Grid", "confidence": 0.85, "evidence": "local from_pretrained, in-process inference", "source": "app_file"}, {"quest": "Tiny Titan", "confidence": 0.82, "evidence": "Llama-3.2-3B is a 3B model", "source": "app_file"}, ], }, { "id": "synthetic/local-openbmb-positive", "title": "Pocket MiniCPM", "declared_models": ["openbmb/MiniCPM5-1B-GGUF"], "tags": ["gradio"], "app_file": "app.py", "readme": "# Pocket MiniCPM\nRuns MiniCPM locally via llama.cpp.", "app": "import gradio as gr\nfrom llama_cpp import Llama\n" "llm = Llama.from_pretrained(\"openbmb/MiniCPM5-1B-GGUF\", filename=\"*Q4_K_M.gguf\")\n\n" "def chat(m):\n return llm.create_chat_completion(messages=[{'role':'user','content':m}])\n\n" "gr.Interface(chat, 'text', 'text').launch()", "matches": [ {"quest": "Llama Champion", "confidence": 0.95, "evidence": "from llama_cpp import Llama", "source": "app_file"}, {"quest": "OpenBMB", "confidence": 0.95, "evidence": "openbmb/MiniCPM5-1B-GGUF model", "source": "app_file"}, {"quest": "Off the Grid", "confidence": 0.9, "evidence": "local llama_cpp GGUF, no remote call", "source": "app_file"}, {"quest": "Tiny Titan", "confidence": 0.82, "evidence": "MiniCPM5-1B is a 1B model", "source": "app_file"}, ], }, { "id": "synthetic/local-minicpmv-positive", "title": "Vision Notes", "declared_models": ["openbmb/MiniCPM-V-4_6"], "tags": ["gradio"], "app_file": "app.py", "readme": "# Vision Notes\nReads images with MiniCPM-V locally.", "app": "import gradio as gr\nfrom transformers import AutoModel\n" "model = AutoModel.from_pretrained(\"openbmb/MiniCPM-V-4_6\", trust_remote_code=True, device_map='cuda')\n\n" "def caption(img):\n return model.chat(image=img, msgs=[])\n\n" "gr.Interface(caption, 'image', 'text').launch()", "matches": [ {"quest": "OpenBMB", "confidence": 0.95, "evidence": "openbmb/MiniCPM-V-4_6 model", "source": "app_file"}, {"quest": "Off the Grid", "confidence": 0.88, "evidence": "local AutoModel.from_pretrained, no remote call", "source": "app_file"}, ], }, ] _REMOTE_RE = re.compile( r"InferenceClient|endpoints\.huggingface|\breplicate\b|\btogether\b|openrouter|gradio_client|" r"\.modal\.run|api\.openai|api\.anthropic|generativeai|cohere\.Client", re.I, ) # OpenBMB == the openbmb org or its MiniCPM/OpenCPM family (the award is "use their model"). _OPENBMB_RE = re.compile(r"openbmb/|minicpm|opencpm", re.I) def _check_invariants(examples: list[dict]) -> None: """Fail the build on the crisp gold violations behind the GTROX failure modes: a remote inference call must not earn Off the Grid, and OpenBMB belongs only to openbmb / MiniCPM-family models. (A reliable >4B check for Tiny Titan is left to the labeller — parameter counts in code are too noisy: 1.7B, commented models, multi-model apps all defeat a regex.)""" problems: list[str] = [] for e in examples: user = e["messages"][1]["content"] body = user.split("METADATA:", 1)[-1] # skip the quest list so its prose can't false-positive app = body.split("[APP_FILE]", 1)[-1] quests = {m["quest"] for m in json.loads(e["messages"][2]["content"])["matches"]} pid = e.get("project_id", "?") if _REMOTE_RE.search(app) and "Off the Grid" in quests: problems.append(f"{pid}: remote inference in app but Off the Grid awarded") if "OpenBMB" in quests and not _OPENBMB_RE.search(body): problems.append(f"{pid}: OpenBMB awarded without an openbmb / MiniCPM model in the content") if problems: raise SystemExit("invariant violations:\n " + "\n ".join(problems)) def main() -> None: parser = argparse.ArgumentParser(description="Assemble the quest SFT dataset.") parser.add_argument("--labels", default="data/quest_labels/labeled.json", type=Path) parser.add_argument("--out", default="data/quest_sft.jsonl", type=Path) parser.add_argument("--app-only", type=int, default=16) parser.add_argument("--readme-only", type=int, default=16) parser.add_argument("--noisy", type=int, default=8) args = parser.parse_args() labeled = json.loads(args.labels.read_text(encoding="utf-8")) rows = labeled["results"] if isinstance(labeled, dict) else labeled examples: list[dict] = [] counts: dict[str, int] = {} def add(ex: dict) -> None: examples.append(ex) counts[ex["variant"]] = counts.get(ex["variant"], 0) + 1 # 1) natural example per labeled project by_slug = {} for row in rows: slug = row["slug"] meta = load_input(slug) matches = row.get("matches") or [] by_slug[slug] = (meta, matches) add(example(meta, meta["README"], meta["APP_FILE"], matches, variant="natural")) # rank projects by richness of each source for augmentation selection app_rich = sorted( ((s, m, ms) for s, (m, ms) in by_slug.items() if any(x["source"] == "app_file" for x in ms)), key=lambda t: -sum(1 for x in t[2] if x["source"] == "app_file"), ) readme_rich = sorted( ((s, m, ms) for s, (m, ms) in by_slug.items() if any(x["source"] == "readme" for x in ms)), key=lambda t: -sum(1 for x in t[2] if x["source"] == "readme"), ) # 2) app-only: strip README, keep only app_file-sourced matches for slug, meta, ms in app_rich[: args.app_only]: kept = [m for m in ms if m["source"] == "app_file"] add(example(meta, NO_README, meta["APP_FILE"], kept, variant="app_only")) # 3) readme-only / missing app file: blank the app file, keep only readme-sourced matches for slug, meta, ms in readme_rich[: args.readme_only]: kept = [m for m in ms if m["source"] == "readme"] add(example(meta, meta["README"], NO_APP, kept, variant="missing_app_file")) # 4) noisy metadata: inject garbled tags + scrambled title, gold unchanged noisy_pool = sorted( ((s, m, ms) for s, (m, ms) in by_slug.items() if ms), key=lambda t: -len(t[2]), ) for slug, meta, ms in noisy_pool[: args.noisy]: noisy_meta = dict(meta) noisy_meta["tags"] = list(meta.get("tags", [])) + ["asdf123", "xx", "region:us", "untitled", "draft"] noisy_meta["title"] = (meta.get("title", "") + " ::: TODO copy of template (do not read title)").strip() add(example(noisy_meta, meta["README"], meta["APP_FILE"], ms, variant="noisy_metadata")) # 5) synthetic contradictions for spec in CONTRADICTIONS: add(example(spec, spec["readme"], spec["app"], spec["matches"], variant="contradiction")) # 6) explicit empties for spec in EMPTY_SAMPLES: add(example(spec, spec["readme"], spec["app"], [], variant="empty")) # 7) app-only variants of the real remote-inference projects (forces judging the # remote app directly; their gold already excludes Off the Grid) covered_app_only = {s for s, _, _ in app_rich[: args.app_only]} for slug in REMOTE_INFERENCE_SLUGS: if slug not in by_slug or slug in covered_app_only: continue meta, ms = by_slug[slug] kept = [m for m in ms if m["source"] == "app_file"] add(example(meta, NO_README, meta["APP_FILE"], kept, variant="remote_app_only")) # 8) hand-authored contrastive hard negatives (remote!=local; org-prefix gates) for spec in HARD_NEGATIVES: add(example(spec, spec["readme"], spec["app"], spec["matches"], variant="hard_negative")) _check_invariants(examples) text = build_dataset_jsonl(examples, source_note="build_small_hackathon real projects + targeted augmentations") manifest, parsed = parse_quest_dataset_jsonl(text) # validates the whole file args.out.write_text(text, encoding="utf-8") print(f"wrote {len(parsed)} examples to {args.out}") print("variant counts:", json.dumps(counts, ensure_ascii=False)) print("empty-match examples:", manifest["empty_match_examples"]) print("quest positive counts:") for quest, n in sorted(manifest["quest_positive_counts"].items(), key=lambda kv: -kv[1]): print(f" {n:3d} {quest}") if __name__ == "__main__": main()