Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

App Files Files Community

hackathon-advisor / scripts /build_quest_sft.py

JacobLinCool

deploy: sync GitHub main de5dbf9

13fe947 verified about 24 hours ago

raw

history blame contribute delete

26.9 kB

	#!/usr/bin/env python3
	"""Assemble the quest-classification SFT dataset from verified teacher labels.

	Inputs:
	data/quest_labels/labeled.json - verified matches per project (from the Workflow)
	data/quest_labels/in/<slug>.json - the exact README / APP_FILE segments shown to the labeller

	Builds one natural example per project plus targeted augmentations so every case the
	prompt must handle is represented: app-only signal, readme-only signal, a missing app
	file, README/app contradictions, empty matches, and noisy metadata. Writes
	data/quest_sft.jsonl (manifest + examples) and prints a coverage report.
	"""
	from __future__ import annotations

	import argparse
	import json
	from pathlib import Path
	import re
	import sys

	ROOT = Path(__file__).resolve().parents[1]
	sys.path.insert(0, str(ROOT))

	from hackathon_advisor.quest_dataset import build_dataset_jsonl, build_example, parse_quest_dataset_jsonl
	from hackathon_advisor.quest_taxonomy import normalize_match, render_quest_prompt

	NO_README = "(no README description provided)"
	NO_APP = "(no app file available)"
	IN_DIR = ROOT / "data" / "quest_labels" / "in"


	def load_input(slug: str) -> dict:
	return json.loads((IN_DIR / f"{slug}.json").read_text(encoding="utf-8"))


	def prompt_for(meta: dict, readme: str, app: str) -> str:
	return render_quest_prompt(
	title=meta.get("title", ""),
	sdk=meta.get("sdk", ""),
	declared_models=meta.get("declared_models", []),
	tags=meta.get("tags", []),
	readme_segment=readme,
	app_file_name=meta.get("app_file", ""),
	app_file_segment=app,
	)


	def example(meta: dict, readme: str, app: str, matches: list[dict], *, variant: str) -> dict:
	return build_example(
	prompt_for(meta, readme, app),
	[normalize_match(m) for m in matches],
	meta={"kind": "quest_classification", "project_id": meta.get("id", ""), "variant": variant},
	)


	# --- synthetic README/app contradictions: README screams "local/offline" but the app
	# clearly calls a proprietary cloud API, so Off the Grid must NOT be awarded. ---
	CONTRADICTIONS = [
	{
	"id": "synthetic/contradiction-1",
	"title": "PocketScribe — fully local notes",
	"declared_models": [],
	"tags": ["gradio"],
	"app_file": "app.py",
	"readme": "# PocketScribe\nPocketScribe is a 100% offline, fully local note-taking assistant. "
	"No API keys, no cloud, runs entirely on your own laptop for total privacy.",
	"app": "import gradio as gr\nfrom openai import OpenAI\nclient = OpenAI()\n\n"
	"def summarize(note):\n r = client.chat.completions.create(model='gpt-4o-mini', "
	"messages=[{'role':'user','content':note}])\n return r.choices[0].message.content\n\n"
	"gr.Interface(summarize, 'text', 'text').launch()",
	"matches": [
	{"quest": "Backyard AI", "confidence": 0.55, "evidence": "personal note-taking assistant", "source": "readme"},
	],
	},
	{
	"id": "synthetic/contradiction-2",
	"title": "HomeVet offline pet advisor",
	"declared_models": [],
	"tags": ["gradio", "pets"],
	"app_file": "app.py",
	"readme": "# HomeVet\nAn offline, local-first pet-care helper for my own dog. Works without the "
	"internet and keeps everything on-device. Built for a real person: my family.",
	"app": "import gradio as gr\nimport anthropic\nclient = anthropic.Anthropic()\n\n"
	"def advise(symptom):\n msg = client.messages.create(model='claude-3-5-sonnet-20241022', "
	"max_tokens=300, messages=[{'role':'user','content':symptom}])\n return msg.content[0].text\n\n"
	"with gr.Blocks() as demo:\n gr.Markdown('# HomeVet')\n inp = gr.Textbox()\n out = gr.Textbox()\n"
	" gr.Button('Ask').click(advise, inp, out)\ndemo.launch()",
	"matches": [
	{"quest": "Backyard AI", "confidence": 0.7, "evidence": "pet-care helper for my own dog", "source": "readme"},
	],
	},
	{
	"id": "synthetic/contradiction-3",
	"title": "GridFree storyteller",
	"declared_models": [],
	"tags": ["gradio", "story"],
	"app_file": "app.py",
	"readme": "# GridFree\nA delightful local, no-cloud bedtime-story generator. Runs off the grid, "
	"no proprietary APIs, entirely on your machine.",
	"app": "import gradio as gr, requests, os\n\nAPI='https://api.openai.com/v1/chat/completions'\n"
	"def story(theme):\n r=requests.post(API, headers={'Authorization':'Bearer '+os.environ['OPENAI_API_KEY']},"
	" json={'model':'gpt-4o','messages':[{'role':'user','content':theme}]})\n return r.json()\n\n"
	"gr.Interface(story,'text','text', css='.gradio-container{background:#102}').launch()",
	"matches": [
	{"quest": "Thousand Token Wood", "confidence": 0.6, "evidence": "bedtime-story generator", "source": "readme"},
	{"quest": "Off-Brand", "confidence": 0.5, "evidence": "custom css background styling", "source": "app_file"},
	],
	},
	{
	"id": "synthetic/contradiction-4",
	"title": "LocalLlama claim vs Gemini app",
	"declared_models": [],
	"tags": ["gradio"],
	"app_file": "app.py",
	"readme": "# QuietDesk\nRuns llama.cpp locally with GGUF weights — completely offline, your data never leaves "
	"the device. A calm local-first desktop assistant.",
	"app": "import gradio as gr\nimport google.generativeai as genai\ngenai.configure(api_key='...')\n"
	"model = genai.GenerativeModel('gemini-1.5-flash')\n\n"
	"def reply(q):\n return model.generate_content(q).text\n\n"
	"gr.ChatInterface(reply).launch()",
	"matches": [],
	},
	{
	"id": "synthetic/contradiction-5",
	"title": "Edge claim, cohere app",
	"declared_models": ["CohereForAI/command-r"],
	"tags": ["gradio"],
	"app_file": "app.py",
	"readme": "# EdgeMind\nEdgeMind is an on-device, fully local agent. No external services. Includes a write-up of "
	"every build decision in our field notes below.\n## Field Notes\nDay 1: chose a tiny model...",
	"app": "import gradio as gr, cohere\nco = cohere.Client('KEY')\n\n"
	"def run(q):\n return co.chat(message=q, model='command-r').text\n\n"
	"gr.Interface(run,'text','text').launch()",
	"matches": [
	{"quest": "Field Notes", "confidence": 0.7, "evidence": "write-up of every build decision", "source": "readme"},
	],
	},
	{
	"id": "synthetic/contradiction-6",
	"title": "README understates a clearly local app",
	"declared_models": ["openbmb/MiniCPM5-1B"],
	"tags": ["gradio"],
	"app_file": "app.py",
	"readme": "# Helper\nA small helper app. (No further description.)",
	"app": "import gradio as gr\nfrom llama_cpp import Llama\n"
	"llm = Llama.from_pretrained('openbmb/MiniCPM5-1B-GGUF', filename='*Q4_K_M.gguf')\n\n"
	"def chat(m):\n return llm.create_chat_completion(messages=[{'role':'user','content':m}])\n\n"
	"gr.Interface(chat,'text','text').launch()",
	"matches": [
	{"quest": "Off the Grid", "confidence": 0.85, "evidence": "local llama_cpp GGUF inference", "source": "app_file"},
	{"quest": "Llama Champion", "confidence": 0.9, "evidence": "from llama_cpp import Llama", "source": "app_file"},
	{"quest": "OpenBMB", "confidence": 0.85, "evidence": "openbmb/MiniCPM5-1B-GGUF", "source": "app_file"},
	{"quest": "Tiny Titan", "confidence": 0.75, "evidence": "MiniCPM5-1B is ~1B params", "source": "app_file"},
	],
	},
	]

	# A couple of fully-empty-signal samples beyond whatever empties occur naturally.
	EMPTY_SAMPLES = [
	{
	"id": "synthetic/empty-1",
	"title": "My Build Small Hackathon",
	"declared_models": [],
	"tags": ["gradio", "region:us"],
	"app_file": "app.py",
	"readme": "Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference",
	"app": "import gradio as gr\n\ndef greet(name):\n return 'Hello ' + name\n\n"
	"gr.Interface(fn=greet, inputs='text', outputs='text').launch()",
	},
	{
	"id": "synthetic/empty-2",
	"title": "todo",
	"declared_models": [],
	"tags": ["gradio"],
	"app_file": "",
	"readme": "todo",
	"app": NO_APP,
	},
	]


	# Real projects (kept in the corpus) whose app calls a REMOTE inference endpoint.
	# Their teacher labels already exclude Off the Grid; app-only variants force the model
	# to judge the remote-inference app directly instead of leaning on its strong prior.
	REMOTE_INFERENCE_SLUGS = [
	"GTROX", "ai-study-buddy", "come-and-compare", "AI-agent-Evaluation-pipeline",
	"Sprout-And-Spoon", "The-Shrine", "Backyard-Demo-Builder", "persona-atlas",
	"Structured-Data-Rescuer", "nutrilens", "ux-crime-scene", "wpl-discovery",
	"legawa", "business-order-assistant", "cloud-parade-cabinet", "gitopadesh",
	]


	# Hand-authored contrastive hard negatives for two observed failure modes:
	# (1) a REMOTE inference call (InferenceClient / endpoints / replicate / *.modal.run)
	# must NOT earn Off the Grid, whatever model it names;
	# (2) OpenBMB belongs only to openbmb/ models and Tiny Titan only to <=4B models,
	# so a non-openbmb / large model id must not trigger them. Positive anchors keep
	# the model from over-correcting on genuinely local openbmb / small models.
	HARD_NEGATIVES = [
	{
	"id": "synthetic/remote-gptoss-empty",
	"title": "Chat Demo", "declared_models": [], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# Chat Demo\nA simple chat space.",
	"app": "import gradio as gr\nfrom huggingface_hub import InferenceClient\n"
	"client = InferenceClient(model=\"openai/gpt-oss-20b\")\n\n"
	"def respond(m, history):\n return client.chat_completion(m).choices[0].message.content\n\n"
	"gr.ChatInterface(respond).launch()",
	"matches": [],
	},
	{
	"id": "synthetic/remote-qwen-offbrand",
	"title": "NeonChat", "declared_models": [], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# NeonChat\nA chat UI with a neon theme.",
	"app": "import gradio as gr\nfrom huggingface_hub import InferenceClient\n"
	"client = InferenceClient(model=\"Qwen/Qwen2.5-72B-Instruct\")\n"
	"CUSTOM_CSS = '.gradio-container{background:#0a0a14} .msg{box-shadow:0 0 12px #0ff}'\n\n"
	"def reply(m, h):\n return client.chat_completion(m).choices[0].message.content\n\n"
	"demo = gr.Blocks(css=CUSTOM_CSS)\n",
	"matches": [
	{"quest": "Off-Brand", "confidence": 0.78, "evidence": "gr.Blocks(css=CUSTOM_CSS) neon custom styling", "source": "app_file"},
	],
	},
	{
	"id": "synthetic/remote-endpoint-backyard",
	"title": "PillReader", "declared_models": [], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# PillReader\nHelps my grandmother read the small print on her medication labels and "
	"set reminders, so she can manage her prescriptions without calling me every day.",
	"app": "import requests, gradio as gr\n"
	"ENDPOINT = \"https://abc123.endpoints.huggingface.cloud\"\n\n"
	"def read(image):\n return requests.post(ENDPOINT, files={'image': image}).json()['text']\n\n"
	"gr.Interface(read, 'image', 'text').launch()",
	"matches": [
	{"quest": "Backyard AI", "confidence": 0.85, "evidence": "helps my grandmother read medication labels", "source": "readme"},
	],
	},
	{
	"id": "synthetic/remote-replicate-ttw",
	"title": "DreamPostcards", "declared_models": [], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# DreamPostcards\nA whimsical generator that turns a sentence about your day into a "
	"dreamy illustrated postcard from an imaginary seaside town.",
	"app": "import replicate, gradio as gr\n\n"
	"def make(prompt):\n return replicate.run('black-forest-labs/flux-schnell', input={'prompt': prompt})\n\n"
	"gr.Interface(make, 'text', 'image').launch()",
	"matches": [
	{"quest": "Thousand Token Wood", "confidence": 0.8, "evidence": "dreamy illustrated postcard generator", "source": "readme"},
	],
	},
	{
	"id": "synthetic/remote-together-empty",
	"title": "AskAnything", "declared_models": [], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# AskAnything\nAsk a question.",
	"app": "import gradio as gr\nfrom together import Together\nclient = Together()\n\n"
	"def ask(q):\n return client.chat.completions.create(model='openai/gpt-oss-120b', "
	"messages=[{'role':'user','content':q}]).choices[0].message.content\n\n"
	"gr.Interface(ask, 'text', 'text').launch()",
	"matches": [],
	},
	{
	"id": "synthetic/remote-modalrun-modal",
	"title": "FastSummarizer", "declared_models": [], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# FastSummarizer\nSummarizes long text. The model is served on Modal.",
	"app": "import requests, gradio as gr\n"
	"MODAL_URL = \"https://myorg--summarizer-serve.modal.run\"\n\n"
	"def summarize(text):\n return requests.post(MODAL_URL, json={'text': text}).json()['summary']\n\n"
	"gr.Interface(summarize, 'text', 'text').launch()",
	"matches": [
	{"quest": "Modal", "confidence": 0.85, "evidence": "model served at *.modal.run endpoint", "source": "app_file"},
	],
	},
	{
	"id": "synthetic/remote-gradioclient-empty",
	"title": "Proxy Chat", "declared_models": [], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# Proxy Chat\nChat front-end.",
	"app": "import gradio as gr\nfrom gradio_client import Client\n"
	"client = Client(\"someorg/big-llm-space\")\n\n"
	"def chat(m):\n return client.predict(m, api_name='/chat')\n\n"
	"gr.Interface(chat, 'text', 'text').launch()",
	"matches": [],
	},
	{
	"id": "synthetic/remote-openrouter-empty",
	"title": "RouterBot", "declared_models": [], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# RouterBot\nA chatbot.",
	"app": "import gradio as gr\nfrom openai import OpenAI\n"
	"client = OpenAI(base_url='https://openrouter.ai/api/v1', api_key='...')\n\n"
	"def reply(m):\n return client.chat.completions.create(model='meta-llama/llama-3.1-8b', "
	"messages=[{'role':'user','content':m}]).choices[0].message.content\n\n"
	"gr.Interface(reply, 'text', 'text').launch()",
	"matches": [],
	},
	{
	"id": "synthetic/local-gptoss20b",
	"title": "LocalGPTOSS", "declared_models": ["openai/gpt-oss-20b"], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# LocalGPTOSS\nRuns gpt-oss locally.",
	"app": "import gradio as gr\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n"
	"model = AutoModelForCausalLM.from_pretrained(\"openai/gpt-oss-20b\", torch_dtype='auto', device_map='cuda')\n"
	"tok = AutoTokenizer.from_pretrained(\"openai/gpt-oss-20b\")\n\n"
	"def gen(p):\n ids = tok(p, return_tensors='pt').to('cuda')\n return tok.decode(model.generate(**ids)[0])\n\n"
	"gr.Interface(gen, 'text', 'text').launch()",
	"matches": [
	{"quest": "Off the Grid", "confidence": 0.88, "evidence": "AutoModelForCausalLM.from_pretrained, in-process, no remote call", "source": "app_file"},
	],
	},
	{
	"id": "synthetic/local-qwen7b",
	"title": "Qwen7B Helper", "declared_models": ["Qwen/Qwen2.5-7B-Instruct"], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# Qwen7B Helper\nA local assistant.",
	"app": "import gradio as gr\nfrom transformers import pipeline\n"
	"pipe = pipeline('text-generation', model=\"Qwen/Qwen2.5-7B-Instruct\", device_map='auto')\n\n"
	"def run(p):\n return pipe(p)[0]['generated_text']\n\n"
	"gr.Interface(run, 'text', 'text').launch()",
	"matches": [
	{"quest": "Off the Grid", "confidence": 0.85, "evidence": "local transformers pipeline, no remote inference", "source": "app_file"},
	],
	},
	{
	"id": "synthetic/local-llamacpp-qwen",
	"title": "Pocket Qwen", "declared_models": ["Qwen/Qwen2.5-7B-Instruct-GGUF"], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# Pocket Qwen\nRuns a GGUF model on your laptop.",
	"app": "import gradio as gr\nfrom llama_cpp import Llama\n"
	"llm = Llama.from_pretrained(\"Qwen/Qwen2.5-7B-Instruct-GGUF\", filename=\"*Q4_K_M.gguf\")\n\n"
	"def chat(m):\n return llm.create_chat_completion(messages=[{'role':'user','content':m}])\n\n"
	"gr.Interface(chat, 'text', 'text').launch()",
	"matches": [
	{"quest": "Llama Champion", "confidence": 0.95, "evidence": "from llama_cpp import Llama GGUF weights", "source": "app_file"},
	{"quest": "Off the Grid", "confidence": 0.88, "evidence": "local llama_cpp GGUF inference, no remote call", "source": "app_file"},
	],
	},
	{
	"id": "synthetic/local-llama3b-tiny",
	"title": "Tiny Llama Buddy", "declared_models": ["meta-llama/Llama-3.2-3B-Instruct"], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# Tiny Llama Buddy\nA small local helper.",
	"app": "import gradio as gr\nfrom transformers import AutoModelForCausalLM\n"
	"model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\", device_map='cuda')\n\n"
	"def gen(p):\n return model_generate(p)\n\n"
	"gr.Interface(gen, 'text', 'text').launch()",
	"matches": [
	{"quest": "Off the Grid", "confidence": 0.85, "evidence": "local from_pretrained, in-process inference", "source": "app_file"},
	{"quest": "Tiny Titan", "confidence": 0.82, "evidence": "Llama-3.2-3B is a 3B model", "source": "app_file"},
	],
	},
	{
	"id": "synthetic/local-openbmb-positive",
	"title": "Pocket MiniCPM", "declared_models": ["openbmb/MiniCPM5-1B-GGUF"], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# Pocket MiniCPM\nRuns MiniCPM locally via llama.cpp.",
	"app": "import gradio as gr\nfrom llama_cpp import Llama\n"
	"llm = Llama.from_pretrained(\"openbmb/MiniCPM5-1B-GGUF\", filename=\"*Q4_K_M.gguf\")\n\n"
	"def chat(m):\n return llm.create_chat_completion(messages=[{'role':'user','content':m}])\n\n"
	"gr.Interface(chat, 'text', 'text').launch()",
	"matches": [
	{"quest": "Llama Champion", "confidence": 0.95, "evidence": "from llama_cpp import Llama", "source": "app_file"},
	{"quest": "OpenBMB", "confidence": 0.95, "evidence": "openbmb/MiniCPM5-1B-GGUF model", "source": "app_file"},
	{"quest": "Off the Grid", "confidence": 0.9, "evidence": "local llama_cpp GGUF, no remote call", "source": "app_file"},
	{"quest": "Tiny Titan", "confidence": 0.82, "evidence": "MiniCPM5-1B is a 1B model", "source": "app_file"},
	],
	},
	{
	"id": "synthetic/local-minicpmv-positive",
	"title": "Vision Notes", "declared_models": ["openbmb/MiniCPM-V-4_6"], "tags": ["gradio"], "app_file": "app.py",
	"readme": "# Vision Notes\nReads images with MiniCPM-V locally.",
	"app": "import gradio as gr\nfrom transformers import AutoModel\n"
	"model = AutoModel.from_pretrained(\"openbmb/MiniCPM-V-4_6\", trust_remote_code=True, device_map='cuda')\n\n"
	"def caption(img):\n return model.chat(image=img, msgs=[])\n\n"
	"gr.Interface(caption, 'image', 'text').launch()",
	"matches": [
	{"quest": "OpenBMB", "confidence": 0.95, "evidence": "openbmb/MiniCPM-V-4_6 model", "source": "app_file"},
	{"quest": "Off the Grid", "confidence": 0.88, "evidence": "local AutoModel.from_pretrained, no remote call", "source": "app_file"},
	],
	},
	]


	_REMOTE_RE = re.compile(
	r"InferenceClient\|endpoints\.huggingface\|\breplicate\b\|\btogether\b\|openrouter\|gradio_client\|"
	r"\.modal\.run\|api\.openai\|api\.anthropic\|generativeai\|cohere\.Client",
	re.I,
	)
	# OpenBMB == the openbmb org or its MiniCPM/OpenCPM family (the award is "use their model").
	_OPENBMB_RE = re.compile(r"openbmb/\|minicpm\|opencpm", re.I)


	def _check_invariants(examples: list[dict]) -> None:
	"""Fail the build on the crisp gold violations behind the GTROX failure modes:
	a remote inference call must not earn Off the Grid, and OpenBMB belongs only to
	openbmb / MiniCPM-family models. (A reliable >4B check for Tiny Titan is left to
	the labeller — parameter counts in code are too noisy: 1.7B, commented models,
	multi-model apps all defeat a regex.)"""
	problems: list[str] = []
	for e in examples:
	user = e["messages"][1]["content"]
	body = user.split("METADATA:", 1)[-1] # skip the quest list so its prose can't false-positive
	app = body.split("[APP_FILE]", 1)[-1]
	quests = {m["quest"] for m in json.loads(e["messages"][2]["content"])["matches"]}
	pid = e.get("project_id", "?")
	if _REMOTE_RE.search(app) and "Off the Grid" in quests:
	problems.append(f"{pid}: remote inference in app but Off the Grid awarded")
	if "OpenBMB" in quests and not _OPENBMB_RE.search(body):
	problems.append(f"{pid}: OpenBMB awarded without an openbmb / MiniCPM model in the content")
	if problems:
	raise SystemExit("invariant violations:\n " + "\n ".join(problems))


	def main() -> None:
	parser = argparse.ArgumentParser(description="Assemble the quest SFT dataset.")
	parser.add_argument("--labels", default="data/quest_labels/labeled.json", type=Path)
	parser.add_argument("--out", default="data/quest_sft.jsonl", type=Path)
	parser.add_argument("--app-only", type=int, default=16)
	parser.add_argument("--readme-only", type=int, default=16)
	parser.add_argument("--noisy", type=int, default=8)
	args = parser.parse_args()

	labeled = json.loads(args.labels.read_text(encoding="utf-8"))
	rows = labeled["results"] if isinstance(labeled, dict) else labeled
	examples: list[dict] = []
	counts: dict[str, int] = {}

	def add(ex: dict) -> None:
	examples.append(ex)
	counts[ex["variant"]] = counts.get(ex["variant"], 0) + 1

	# 1) natural example per labeled project
	by_slug = {}
	for row in rows:
	slug = row["slug"]
	meta = load_input(slug)
	matches = row.get("matches") or []
	by_slug[slug] = (meta, matches)
	add(example(meta, meta["README"], meta["APP_FILE"], matches, variant="natural"))

	# rank projects by richness of each source for augmentation selection
	app_rich = sorted(
	((s, m, ms) for s, (m, ms) in by_slug.items() if any(x["source"] == "app_file" for x in ms)),
	key=lambda t: -sum(1 for x in t[2] if x["source"] == "app_file"),
	)
	readme_rich = sorted(
	((s, m, ms) for s, (m, ms) in by_slug.items() if any(x["source"] == "readme" for x in ms)),
	key=lambda t: -sum(1 for x in t[2] if x["source"] == "readme"),
	)

	# 2) app-only: strip README, keep only app_file-sourced matches
	for slug, meta, ms in app_rich[: args.app_only]:
	kept = [m for m in ms if m["source"] == "app_file"]
	add(example(meta, NO_README, meta["APP_FILE"], kept, variant="app_only"))

	# 3) readme-only / missing app file: blank the app file, keep only readme-sourced matches
	for slug, meta, ms in readme_rich[: args.readme_only]:
	kept = [m for m in ms if m["source"] == "readme"]
	add(example(meta, meta["README"], NO_APP, kept, variant="missing_app_file"))

	# 4) noisy metadata: inject garbled tags + scrambled title, gold unchanged
	noisy_pool = sorted(
	((s, m, ms) for s, (m, ms) in by_slug.items() if ms),
	key=lambda t: -len(t[2]),
	)
	for slug, meta, ms in noisy_pool[: args.noisy]:
	noisy_meta = dict(meta)
	noisy_meta["tags"] = list(meta.get("tags", [])) + ["asdf123", "xx", "region:us", "untitled", "draft"]
	noisy_meta["title"] = (meta.get("title", "") + " ::: TODO copy of template (do not read title)").strip()
	add(example(noisy_meta, meta["README"], meta["APP_FILE"], ms, variant="noisy_metadata"))

	# 5) synthetic contradictions
	for spec in CONTRADICTIONS:
	add(example(spec, spec["readme"], spec["app"], spec["matches"], variant="contradiction"))

	# 6) explicit empties
	for spec in EMPTY_SAMPLES:
	add(example(spec, spec["readme"], spec["app"], [], variant="empty"))

	# 7) app-only variants of the real remote-inference projects (forces judging the
	# remote app directly; their gold already excludes Off the Grid)
	covered_app_only = {s for s, _, _ in app_rich[: args.app_only]}
	for slug in REMOTE_INFERENCE_SLUGS:
	if slug not in by_slug or slug in covered_app_only:
	continue
	meta, ms = by_slug[slug]
	kept = [m for m in ms if m["source"] == "app_file"]
	add(example(meta, NO_README, meta["APP_FILE"], kept, variant="remote_app_only"))

	# 8) hand-authored contrastive hard negatives (remote!=local; org-prefix gates)
	for spec in HARD_NEGATIVES:
	add(example(spec, spec["readme"], spec["app"], spec["matches"], variant="hard_negative"))

	_check_invariants(examples)

	text = build_dataset_jsonl(examples, source_note="build_small_hackathon real projects + targeted augmentations")
	manifest, parsed = parse_quest_dataset_jsonl(text) # validates the whole file
	args.out.write_text(text, encoding="utf-8")

	print(f"wrote {len(parsed)} examples to {args.out}")
	print("variant counts:", json.dumps(counts, ensure_ascii=False))
	print("empty-match examples:", manifest["empty_match_examples"])
	print("quest positive counts:")
	for quest, n in sorted(manifest["quest_positive_counts"].items(), key=lambda kv: -kv[1]):
	print(f" {n:3d} {quest}")


	if __name__ == "__main__":
	main()