cadforge / scripts /generate_tasks_thomasmaker.py
eventhorizon28's picture
Upload folder using huggingface_hub
7c72eb2 verified
#!/usr/bin/env python3
"""Generate task files from the ThomasTheMaker/cadquery HuggingFace dataset.
Pipeline per example:
1. Execute CadQuery code -> get shape
2. Run preprocess_from_code -> ground_truth.step, ground_truth.json, .npy
3. Send HF image + code + ground_truth.json to Claude Sonnet -> get NL prompt
4. Write task.json, reference_code.py
Usage:
python scripts/generate_tasks_thomasmaker.py --limit 3 # test on 3
python scripts/generate_tasks_thomasmaker.py # all 50
python scripts/generate_tasks_thomasmaker.py --dry-run # just show prompts
"""
import argparse
import base64
import io
import json
import logging
import math
import os
import re
import sys
import time
import traceback
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
TASKS_ROOT = Path(__file__).parent.parent / "server" / "tasks"
SELECTED_PATH = Path(__file__).parent.parent / "selected_50.json"
START_TASK_NUM = 21
def _load_api_key():
key = os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_KEY")
if key:
return key
env_path = Path(__file__).parent.parent.parent / ".env"
if env_path.exists():
for line in env_path.read_text().splitlines():
line = line.strip()
if line.startswith("ANTHROPIC_KEY="):
return line.split("=", 1)[1].strip().strip("'\"")
if line.startswith("ANTHROPIC_API_KEY="):
return line.split("=", 1)[1].strip().strip("'\"")
return ""
ANTHROPIC_API_KEY = _load_api_key()
PROMPT_SYSTEM = """You are a CAD engineering assistant. You will be shown an image of a 3D CAD part along with its geometric properties. Your job is to write a clear, detailed natural language description that a CAD engineer could use to recreate this exact part in CadQuery.
Rules:
- Write in natural language, NOT code. Do not use any code syntax.
- Be specific about dimensions in millimeters (use the bbox and geometry data provided).
- Describe the construction steps: what base shape, what features are added/removed, what boolean operations.
- Mention the number and types of faces if it helps clarify the geometry.
- Describe holes, bosses, pockets, curved surfaces, arcs, fillets if visible.
- Mention symmetry if present.
- End with "Orient the longest axis along X."
- Keep it to one clear paragraph, 3-8 sentences.
- Do NOT start with "Create a" - vary your openings."""
def image_to_base64(pil_image):
buf = io.BytesIO()
pil_image.save(buf, format="PNG")
return base64.standard_b64encode(buf.getvalue()).decode("utf-8")
def call_claude_for_prompt(pil_image, gt_json, code, code_info):
import anthropic
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
img_b64 = image_to_base64(pil_image)
user_text = f"""Here is the ground truth geometry data for this 3D part:
{json.dumps(gt_json, indent=2)}
Here is the CadQuery code that generates this part:
```python
{code}
```
Additional construction info:
- Number of extrusions: {code_info.get('extrudes', 0)}
- Number of union operations: {code_info.get('unions', 0)}
- Number of cut operations: {code_info.get('cuts', 0)}
- Number of arc segments: {code_info.get('arcs', 0)}
- Number of circular features: {code_info.get('circles', 0)}
Look at the image carefully along with the ground truth data and the code above. Write a detailed natural language prompt describing this part so someone could recreate it in CadQuery."""
t0 = time.time()
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=700,
system=PROMPT_SYSTEM,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": img_b64,
},
},
{
"type": "text",
"text": user_text,
},
],
}
],
)
elapsed = time.time() - t0
prompt_text = response.content[0].text.strip()
logger.info(f" Claude API call took {elapsed:.1f}s, prompt length={len(prompt_text)}")
return prompt_text
def execute_code(code):
import cadquery as cq
adapted = code.rstrip()
if "\nresult" not in adapted and "\nresult " not in adapted:
last_solid = None
for m in re.finditer(r"^(solid\w*)\s*=", adapted, re.MULTILINE):
last_solid = m.group(1)
if last_solid:
adapted += f"\nresult = {last_solid}"
else:
adapted += "\nresult = solid"
local_ns = {"cq": cq, "cadquery": cq, "math": math}
exec(adapted, local_ns)
result = local_ns.get("result")
if result is None:
raise ValueError("No 'result' variable after execution")
if hasattr(result, "val"):
shape = result.val()
else:
shape = result
bb = shape.BoundingBox()
if bb.xlen < 1e-6 and bb.ylen < 1e-6 and bb.zlen < 1e-6:
raise ValueError("Degenerate shape (zero bbox)")
return shape, adapted
def analyze_geometry(shape, code_info):
from server.geometry import extract_properties
props = extract_properties(shape)
bb = shape.BoundingBox()
bbox = [round(bb.xlen, 4), round(bb.ylen, 4), round(bb.zlen, 4)]
return {
"bbox": bbox,
"volume": props.get("volume_mm3", 0),
"surface_area": props.get("surface_area_mm2", 0),
"face_count": props.get("face_count", 0),
"face_type_counts": props.get("face_type_counts", {}),
"dominant_face_type": props.get("dominant_face_type", ""),
"euler": props.get("euler_characteristic", 2),
"shape_class": props.get("shape_class", "COMPLEX_SOLID"),
"edge_count": props.get("edge_count", 0),
"vertex_count": props.get("vertex_count", 0),
"has_xy_symmetry": props.get("has_xy_symmetry", False),
"has_xz_symmetry": props.get("has_xz_symmetry", False),
"has_yz_symmetry": props.get("has_yz_symmetry", False),
"extrudes": code_info.get("extrudes", 0),
"unions": code_info.get("unions", 0),
"cuts": code_info.get("cuts", 0),
"arcs": code_info.get("arcs", 0),
"circles": code_info.get("circles", 0),
}
def difficulty_bin(label, score):
if label == "medium":
return min(5, max(3, 3 + (score - 10) // 3))
elif label == "hard":
return min(7, max(5, 5 + (score - 19) // 6))
else:
return min(9, max(7, 7 + (score - 35) // 15))
def generate_one_task(ds, info, task_num, dry_run=False):
t0 = time.time()
idx = info["idx"]
row = ds[idx]
code = row["texts"][0]["assistant"]
pil_image = row["images"][0]
label = info["difficulty_label"]
task_id = f"task_{task_num:03d}_hf_{idx}"
task_dir = TASKS_ROOT / task_id
logger.info(f"[{task_num}] Processing {task_id} (hf_idx={idx}, score={info['score']}, {label})")
try:
shape, adapted_code = execute_code(code)
logger.info(f" Code executed OK")
except Exception as e:
logger.error(f" EXEC FAIL: {e}")
return {"task_id": task_id, "success": False, "error": f"exec: {e}",
"elapsed_s": round(time.time() - t0, 2)}
task_dir.mkdir(parents=True, exist_ok=True)
with open(task_dir / "reference_code.py", "w") as f:
f.write(adapted_code)
try:
from server.preprocessor import preprocess_from_code
gt = preprocess_from_code(adapted_code, str(task_dir), task_id=task_id)
logger.info(f" GT OK: vol={gt.get('volume_mm3')}, bbox={gt.get('bbox_mm')}")
except Exception as e:
logger.error(f" GT FAIL: {e}")
logger.error(traceback.format_exc())
return {"task_id": task_id, "success": False, "error": f"gt: {e}",
"elapsed_s": round(time.time() - t0, 2)}
gt_json_path = task_dir / "ground_truth.json"
with open(gt_json_path) as f:
gt_json = json.load(f)
try:
nl_prompt = call_claude_for_prompt(pil_image, gt_json, code, info)
except Exception as e:
logger.error(f" PROMPT FAIL: {e}")
return {"task_id": task_id, "success": False, "error": f"prompt: {e}",
"elapsed_s": round(time.time() - t0, 2)}
d_bin = difficulty_bin(label, info["score"])
max_steps = 20 if label == "medium" else (25 if label == "hard" else 30)
task_json = {
"id": task_id,
"part_class": gt_json.get("dominant_face_type", "complex").lower(),
"difficulty_bin": d_bin,
"max_steps": max_steps,
"prompt": nl_prompt,
"ground_truth_step": f"tasks/{task_id}/ground_truth.step",
"ground_truth_json": f"tasks/{task_id}/ground_truth.json",
"reference_code": f"tasks/{task_id}/reference_code.py",
"source": "ThomasTheMaker/cadquery",
"hf_index": idx,
"complexity_score": info["score"],
"difficulty_label": label,
}
with open(task_dir / "task.json", "w") as f:
json.dump(task_json, f, indent=2)
elapsed = round(time.time() - t0, 2)
logger.info(f" DONE {task_id} ({elapsed}s)")
return {"task_id": task_id, "success": True, "elapsed_s": elapsed,
"prompt_preview": nl_prompt[:150],
"volume": gt.get("volume_mm3"), "face_count": gt.get("face_count")}
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--limit", type=int, default=None)
parser.add_argument("--start-num", type=int, default=START_TASK_NUM)
args = parser.parse_args()
t0_total = time.time()
if not ANTHROPIC_API_KEY:
logger.error("ANTHROPIC_API_KEY not set")
return 1
with open(SELECTED_PATH) as f:
selected = json.load(f)
logger.info(f"Loaded {len(selected)} selected examples")
if args.limit:
selected = selected[:args.limit]
from datasets import load_dataset
logger.info("Loading HF dataset...")
ds = load_dataset("ThomasTheMaker/cadquery", split="train")
logger.info(f"Loaded {len(ds)} rows")
results = []
task_num = args.start_num
success_count = 0
fail_count = 0
for i, info in enumerate(selected):
r = generate_one_task(ds, info, task_num, dry_run=args.dry_run)
results.append(r)
if r.get("success"):
success_count += 1
else:
fail_count += 1
task_num += 1
if (i + 1) % 5 == 0:
logger.info(f"=== Progress: {i+1}/{len(selected)} (ok={success_count}, fail={fail_count}) ===")
elapsed_total = time.time() - t0_total
print("\n" + "=" * 80)
print(f"DONE: {success_count}/{len(selected)} succeeded, {fail_count} failed")
print(f"Total time: {elapsed_total:.1f}s ({elapsed_total/60:.1f}m)")
report_path = TASKS_ROOT.parent / "thomasmaker_generation_report.json"
with open(report_path, "w") as f:
json.dump(results, f, indent=2)
print(f"Report: {report_path}")
return 0 if fail_count == 0 else 1
if __name__ == "__main__":
sys.exit(main())