Spaces:

eventhorizon28
/

cadforge

Sleeping

App Files Files Community

cadforge / scripts /generate_tasks_thomasmaker.py

eventhorizon28

Upload folder using huggingface_hub

7c72eb2 verified about 1 month ago

raw

history blame contribute delete

11.5 kB

	#!/usr/bin/env python3
	"""Generate task files from the ThomasTheMaker/cadquery HuggingFace dataset.

	Pipeline per example:
	1. Execute CadQuery code -> get shape
	2. Run preprocess_from_code -> ground_truth.step, ground_truth.json, .npy
	3. Send HF image + code + ground_truth.json to Claude Sonnet -> get NL prompt
	4. Write task.json, reference_code.py

	Usage:
	python scripts/generate_tasks_thomasmaker.py --limit 3 # test on 3
	python scripts/generate_tasks_thomasmaker.py # all 50
	python scripts/generate_tasks_thomasmaker.py --dry-run # just show prompts
	"""
	import argparse
	import base64
	import io
	import json
	import logging
	import math
	import os
	import re
	import sys
	import time
	import traceback
	from pathlib import Path

	sys.path.insert(0, str(Path(__file__).parent.parent))

	logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
	logger = logging.getLogger(__name__)

	TASKS_ROOT = Path(__file__).parent.parent / "server" / "tasks"
	SELECTED_PATH = Path(__file__).parent.parent / "selected_50.json"
	START_TASK_NUM = 21

	def _load_api_key():
	key = os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_KEY")
	if key:
	return key
	env_path = Path(__file__).parent.parent.parent / ".env"
	if env_path.exists():
	for line in env_path.read_text().splitlines():
	line = line.strip()
	if line.startswith("ANTHROPIC_KEY="):
	return line.split("=", 1)[1].strip().strip("'\"")
	if line.startswith("ANTHROPIC_API_KEY="):
	return line.split("=", 1)[1].strip().strip("'\"")
	return ""

	ANTHROPIC_API_KEY = _load_api_key()

	PROMPT_SYSTEM = """You are a CAD engineering assistant. You will be shown an image of a 3D CAD part along with its geometric properties. Your job is to write a clear, detailed natural language description that a CAD engineer could use to recreate this exact part in CadQuery.

	Rules:
	- Write in natural language, NOT code. Do not use any code syntax.
	- Be specific about dimensions in millimeters (use the bbox and geometry data provided).
	- Describe the construction steps: what base shape, what features are added/removed, what boolean operations.
	- Mention the number and types of faces if it helps clarify the geometry.
	- Describe holes, bosses, pockets, curved surfaces, arcs, fillets if visible.
	- Mention symmetry if present.
	- End with "Orient the longest axis along X."
	- Keep it to one clear paragraph, 3-8 sentences.
	- Do NOT start with "Create a" - vary your openings."""


	def image_to_base64(pil_image):
	buf = io.BytesIO()
	pil_image.save(buf, format="PNG")
	return base64.standard_b64encode(buf.getvalue()).decode("utf-8")


	def call_claude_for_prompt(pil_image, gt_json, code, code_info):
	import anthropic
	client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

	img_b64 = image_to_base64(pil_image)

	user_text = f"""Here is the ground truth geometry data for this 3D part:

	{json.dumps(gt_json, indent=2)}

	Here is the CadQuery code that generates this part:

	```python
	{code}
	```

	Additional construction info:
	- Number of extrusions: {code_info.get('extrudes', 0)}
	- Number of union operations: {code_info.get('unions', 0)}
	- Number of cut operations: {code_info.get('cuts', 0)}
	- Number of arc segments: {code_info.get('arcs', 0)}
	- Number of circular features: {code_info.get('circles', 0)}

	Look at the image carefully along with the ground truth data and the code above. Write a detailed natural language prompt describing this part so someone could recreate it in CadQuery."""

	t0 = time.time()
	response = client.messages.create(
	model="claude-sonnet-4-20250514",
	max_tokens=700,
	system=PROMPT_SYSTEM,
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "image",
	"source": {
	"type": "base64",
	"media_type": "image/png",
	"data": img_b64,
	},
	},
	{
	"type": "text",
	"text": user_text,
	},
	],
	}
	],
	)
	elapsed = time.time() - t0
	prompt_text = response.content[0].text.strip()
	logger.info(f" Claude API call took {elapsed:.1f}s, prompt length={len(prompt_text)}")
	return prompt_text


	def execute_code(code):
	import cadquery as cq
	adapted = code.rstrip()
	if "\nresult" not in adapted and "\nresult " not in adapted:
	last_solid = None
	for m in re.finditer(r"^(solid\w)\s=", adapted, re.MULTILINE):
	last_solid = m.group(1)
	if last_solid:
	adapted += f"\nresult = {last_solid}"
	else:
	adapted += "\nresult = solid"

	local_ns = {"cq": cq, "cadquery": cq, "math": math}
	exec(adapted, local_ns)

	result = local_ns.get("result")
	if result is None:
	raise ValueError("No 'result' variable after execution")

	if hasattr(result, "val"):
	shape = result.val()
	else:
	shape = result

	bb = shape.BoundingBox()
	if bb.xlen < 1e-6 and bb.ylen < 1e-6 and bb.zlen < 1e-6:
	raise ValueError("Degenerate shape (zero bbox)")

	return shape, adapted


	def analyze_geometry(shape, code_info):
	from server.geometry import extract_properties
	props = extract_properties(shape)

	bb = shape.BoundingBox()
	bbox = [round(bb.xlen, 4), round(bb.ylen, 4), round(bb.zlen, 4)]

	return {
	"bbox": bbox,
	"volume": props.get("volume_mm3", 0),
	"surface_area": props.get("surface_area_mm2", 0),
	"face_count": props.get("face_count", 0),
	"face_type_counts": props.get("face_type_counts", {}),
	"dominant_face_type": props.get("dominant_face_type", ""),
	"euler": props.get("euler_characteristic", 2),
	"shape_class": props.get("shape_class", "COMPLEX_SOLID"),
	"edge_count": props.get("edge_count", 0),
	"vertex_count": props.get("vertex_count", 0),
	"has_xy_symmetry": props.get("has_xy_symmetry", False),
	"has_xz_symmetry": props.get("has_xz_symmetry", False),
	"has_yz_symmetry": props.get("has_yz_symmetry", False),
	"extrudes": code_info.get("extrudes", 0),
	"unions": code_info.get("unions", 0),
	"cuts": code_info.get("cuts", 0),
	"arcs": code_info.get("arcs", 0),
	"circles": code_info.get("circles", 0),
	}


	def difficulty_bin(label, score):
	if label == "medium":
	return min(5, max(3, 3 + (score - 10) // 3))
	elif label == "hard":
	return min(7, max(5, 5 + (score - 19) // 6))
	else:
	return min(9, max(7, 7 + (score - 35) // 15))


	def generate_one_task(ds, info, task_num, dry_run=False):
	t0 = time.time()
	idx = info["idx"]
	row = ds[idx]
	code = row["texts"][0]["assistant"]
	pil_image = row["images"][0]
	label = info["difficulty_label"]

	task_id = f"task_{task_num:03d}_hf_{idx}"
	task_dir = TASKS_ROOT / task_id

	logger.info(f"[{task_num}] Processing {task_id} (hf_idx={idx}, score={info['score']}, {label})")

	try:
	shape, adapted_code = execute_code(code)
	logger.info(f" Code executed OK")
	except Exception as e:
	logger.error(f" EXEC FAIL: {e}")
	return {"task_id": task_id, "success": False, "error": f"exec: {e}",
	"elapsed_s": round(time.time() - t0, 2)}

	task_dir.mkdir(parents=True, exist_ok=True)

	with open(task_dir / "reference_code.py", "w") as f:
	f.write(adapted_code)

	try:
	from server.preprocessor import preprocess_from_code
	gt = preprocess_from_code(adapted_code, str(task_dir), task_id=task_id)
	logger.info(f" GT OK: vol={gt.get('volume_mm3')}, bbox={gt.get('bbox_mm')}")
	except Exception as e:
	logger.error(f" GT FAIL: {e}")
	logger.error(traceback.format_exc())
	return {"task_id": task_id, "success": False, "error": f"gt: {e}",
	"elapsed_s": round(time.time() - t0, 2)}

	gt_json_path = task_dir / "ground_truth.json"
	with open(gt_json_path) as f:
	gt_json = json.load(f)

	try:
	nl_prompt = call_claude_for_prompt(pil_image, gt_json, code, info)
	except Exception as e:
	logger.error(f" PROMPT FAIL: {e}")
	return {"task_id": task_id, "success": False, "error": f"prompt: {e}",
	"elapsed_s": round(time.time() - t0, 2)}

	d_bin = difficulty_bin(label, info["score"])
	max_steps = 20 if label == "medium" else (25 if label == "hard" else 30)

	task_json = {
	"id": task_id,
	"part_class": gt_json.get("dominant_face_type", "complex").lower(),
	"difficulty_bin": d_bin,
	"max_steps": max_steps,
	"prompt": nl_prompt,
	"ground_truth_step": f"tasks/{task_id}/ground_truth.step",
	"ground_truth_json": f"tasks/{task_id}/ground_truth.json",
	"reference_code": f"tasks/{task_id}/reference_code.py",
	"source": "ThomasTheMaker/cadquery",
	"hf_index": idx,
	"complexity_score": info["score"],
	"difficulty_label": label,
	}

	with open(task_dir / "task.json", "w") as f:
	json.dump(task_json, f, indent=2)

	elapsed = round(time.time() - t0, 2)
	logger.info(f" DONE {task_id} ({elapsed}s)")
	return {"task_id": task_id, "success": True, "elapsed_s": elapsed,
	"prompt_preview": nl_prompt[:150],
	"volume": gt.get("volume_mm3"), "face_count": gt.get("face_count")}


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--dry-run", action="store_true")
	parser.add_argument("--limit", type=int, default=None)
	parser.add_argument("--start-num", type=int, default=START_TASK_NUM)
	args = parser.parse_args()

	t0_total = time.time()

	if not ANTHROPIC_API_KEY:
	logger.error("ANTHROPIC_API_KEY not set")
	return 1

	with open(SELECTED_PATH) as f:
	selected = json.load(f)
	logger.info(f"Loaded {len(selected)} selected examples")

	if args.limit:
	selected = selected[:args.limit]

	from datasets import load_dataset
	logger.info("Loading HF dataset...")
	ds = load_dataset("ThomasTheMaker/cadquery", split="train")
	logger.info(f"Loaded {len(ds)} rows")

	results = []
	task_num = args.start_num
	success_count = 0
	fail_count = 0

	for i, info in enumerate(selected):
	r = generate_one_task(ds, info, task_num, dry_run=args.dry_run)
	results.append(r)
	if r.get("success"):
	success_count += 1
	else:
	fail_count += 1
	task_num += 1

	if (i + 1) % 5 == 0:
	logger.info(f"=== Progress: {i+1}/{len(selected)} (ok={success_count}, fail={fail_count}) ===")

	elapsed_total = time.time() - t0_total
	print("\n" + "=" * 80)
	print(f"DONE: {success_count}/{len(selected)} succeeded, {fail_count} failed")
	print(f"Total time: {elapsed_total:.1f}s ({elapsed_total/60:.1f}m)")

	report_path = TASKS_ROOT.parent / "thomasmaker_generation_report.json"
	with open(report_path, "w") as f:
	json.dump(results, f, indent=2)
	print(f"Report: {report_path}")

	return 0 if fail_count == 0 else 1


	if __name__ == "__main__":
	sys.exit(main())