ddebree's picture
Upload folder using huggingface_hub
f9306c2 verified
"""Demo dataset generation for trying the explorer locally."""
from __future__ import annotations
import json
from pathlib import Path
from PIL import Image, ImageDraw
def create_demo_dataset(output_dir: Path) -> Path:
"""Create a tiny MathVision-like dataset with simple generated images."""
image_dir = output_dir / "images"
image_dir.mkdir(parents=True, exist_ok=True)
records = [
{
"id": "demo-red-squares",
"question": "How many red squares are visible?",
"answer": "4",
"image": "images/red-squares.png",
"options": ["3", "4", "5"],
"subject": "counting",
"level": 1,
"solution": "Count the four red square tiles.",
},
{
"id": "demo-red-squares-small",
"question": "How many small red squares are visible?",
"answer": "5",
"image": "images/red-squares-small.png",
"options": ["4", "5", "6"],
"subject": "counting",
"level": 2,
"solution": "The red tiles form a group of five.",
},
{
"id": "demo-blue-triangles",
"question": "How many blue triangles are visible?",
"answer": "3",
"image": "images/blue-triangles.png",
"options": ["2", "3", "4"],
"subject": "geometry",
"level": 1,
"solution": "There are three separate blue triangles.",
},
{
"id": "demo-blue-pyramids",
"question": "Which shape appears repeatedly?",
"answer": "triangle",
"image": "images/blue-pyramids.png",
"options": ["circle", "triangle", "square"],
"subject": "geometry",
"level": 2,
"solution": "The repeated blue shapes are triangles.",
},
{
"id": "demo-red-grid",
"question": "Which tile color dominates the grid?",
"answer": "red",
"image": "images/red-grid.png",
"options": ["red", "blue", "green"],
"subject": "pattern",
"level": 2,
"solution": "Most grid cells are red.",
},
{
"id": "demo-green-grid",
"question": "Which tile color dominates this grid?",
"answer": "green",
"image": "images/green-grid.png",
"options": ["red", "blue", "green"],
"subject": "pattern",
"level": 2,
"solution": "Green appears in most grid cells.",
},
{
"id": "demo-number-line",
"question": "Which point is closest to 4?",
"answer": "C",
"image": "images/number-line.png",
"options": ["A", "B", "C"],
"subject": "algebra",
"level": 1,
"solution": "Point C is drawn nearest to the tick labeled 4.",
},
{
"id": "demo-clock",
"question": "Which hour does the short hand point to?",
"answer": "3",
"image": "images/clock.png",
"options": ["2", "3", "4"],
"subject": "measurement",
"level": 1,
"solution": "The shorter hand points toward 3.",
},
]
_draw_red_squares(image_dir / "red-squares.png")
_draw_red_squares_small(image_dir / "red-squares-small.png")
_draw_blue_triangles(image_dir / "blue-triangles.png")
_draw_blue_pyramids(image_dir / "blue-pyramids.png")
_draw_red_grid(image_dir / "red-grid.png")
_draw_green_grid(image_dir / "green-grid.png")
_draw_number_line(image_dir / "number-line.png")
_draw_clock(image_dir / "clock.png")
jsonl_path = output_dir / "demo.jsonl"
with jsonl_path.open("w", encoding="utf-8") as jsonl_file:
for record in records:
jsonl_file.write(json.dumps(record, sort_keys=True))
jsonl_file.write("\n")
return jsonl_path
def _new_canvas() -> Image.Image:
return Image.new("RGB", (420, 280), color=(248, 250, 252))
def _draw_red_squares(path: Path) -> None:
image = _new_canvas()
draw = ImageDraw.Draw(image)
for x, y in [(80, 60), (170, 60), (80, 150), (170, 150)]:
draw.rectangle((x, y, x + 58, y + 58), fill=(220, 38, 38), outline=(127, 29, 29), width=3)
image.save(path)
def _draw_blue_triangles(path: Path) -> None:
image = _new_canvas()
draw = ImageDraw.Draw(image)
triangles = [
[(90, 190), (130, 80), (170, 190)],
[(190, 190), (230, 80), (270, 190)],
[(290, 190), (330, 80), (370, 190)],
]
for triangle in triangles:
draw.polygon(triangle, fill=(37, 99, 235), outline=(30, 64, 175))
image.save(path)
def _draw_red_squares_small(path: Path) -> None:
image = _new_canvas()
draw = ImageDraw.Draw(image)
for x, y in [(78, 54), (148, 54), (218, 54), (112, 134), (184, 134)]:
draw.rectangle((x, y, x + 46, y + 46), fill=(239, 68, 68), outline=(127, 29, 29), width=3)
image.save(path)
def _draw_blue_pyramids(path: Path) -> None:
image = _new_canvas()
draw = ImageDraw.Draw(image)
for x, y, size in [(82, 178, 52), (162, 178, 68), (262, 178, 82)]:
draw.polygon(
[(x, y), (x + size // 2, y - size), (x + size, y)],
fill=(59, 130, 246),
outline=(30, 64, 175),
)
image.save(path)
def _draw_red_grid(path: Path) -> None:
image = _new_canvas()
draw = ImageDraw.Draw(image)
colors = [
(220, 38, 38),
(220, 38, 38),
(22, 163, 74),
(220, 38, 38),
(37, 99, 235),
(220, 38, 38),
]
for index, color in enumerate(colors):
row, column = divmod(index, 3)
x = 92 + column * 82
y = 64 + row * 82
draw.rectangle((x, y, x + 64, y + 64), fill=color, outline=(15, 23, 42), width=2)
image.save(path)
def _draw_green_grid(path: Path) -> None:
image = _new_canvas()
draw = ImageDraw.Draw(image)
colors = [
(22, 163, 74),
(22, 163, 74),
(220, 38, 38),
(22, 163, 74),
(37, 99, 235),
(22, 163, 74),
]
for index, color in enumerate(colors):
row, column = divmod(index, 3)
x = 92 + column * 82
y = 64 + row * 82
draw.rectangle((x, y, x + 64, y + 64), fill=color, outline=(15, 23, 42), width=2)
image.save(path)
def _draw_number_line(path: Path) -> None:
image = _new_canvas()
draw = ImageDraw.Draw(image)
draw.line((62, 148, 358, 148), fill=(15, 23, 42), width=4)
for index in range(6):
x = 62 + index * 59
draw.line((x, 134, x, 162), fill=(15, 23, 42), width=3)
draw.text((x - 5, 170), str(index), fill=(15, 23, 42))
points = [
("A", 174, (37, 99, 235)),
("B", 246, (22, 163, 74)),
("C", 296, (220, 38, 38)),
]
for label, x, color in points:
draw.ellipse((x - 9, 108, x + 9, 126), fill=color)
draw.text((x - 5, 86), label, fill=(15, 23, 42))
image.save(path)
def _draw_clock(path: Path) -> None:
image = _new_canvas()
draw = ImageDraw.Draw(image)
center = (210, 140)
draw.ellipse((100, 30, 320, 250), fill=(255, 255, 255), outline=(15, 23, 42), width=4)
for label, xy in [("12", (199, 48)), ("3", (290, 132)), ("6", (205, 220)), ("9", (120, 132))]:
draw.text(xy, label, fill=(15, 23, 42))
draw.line((center[0], center[1], 282, 140), fill=(220, 38, 38), width=6)
draw.line((center[0], center[1], 210, 68), fill=(37, 99, 235), width=4)
draw.ellipse((202, 132, 218, 148), fill=(15, 23, 42))
image.save(path)