| """ |
| tutor/visual_grounding.py |
| Renders a colourful counting stimulus as an RGB numpy array for gr.Image. |
| Robust: works with or without system fonts, never returns None for valid counts. |
| """ |
| from __future__ import annotations |
| import math |
| import numpy as np |
| from typing import Optional |
|
|
| try: |
| from PIL import Image, ImageDraw, ImageFont |
| _PIL = True |
| except ImportError: |
| _PIL = False |
|
|
| |
| _PALETTE = [ |
| (231, 76, 60), |
| ( 52,152,219), |
| ( 46,204,113), |
| (243,156, 18), |
| (155, 89,182), |
| ( 26,188,156), |
| (233, 30,153), |
| (241,196, 15), |
| ( 52, 73, 94), |
| (230,126, 34), |
| ] |
|
|
| _CANVAS_W = 480 |
| _CANVAS_H = 320 |
| _BG = (250, 250, 255) |
|
|
|
|
| def _get_font(size: int) -> "ImageFont.ImageFont": |
| """Try several font paths; fall back to PIL default.""" |
| candidates = [ |
| "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", |
| "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", |
| "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf", |
| "/usr/share/fonts/truetype/ubuntu/Ubuntu-B.ttf", |
| "/System/Library/Fonts/Helvetica.ttc", |
| "C:/Windows/Fonts/arialbd.ttf", |
| ] |
| for path in candidates: |
| try: |
| return ImageFont.truetype(path, size) |
| except Exception: |
| continue |
| |
| return ImageFont.load_default() |
|
|
|
|
| def _draw_text_centered(draw, text: str, y: int, font, color=(40, 40, 40), |
| canvas_w: int = _CANVAS_W): |
| """Draw text horizontally centred at vertical position y.""" |
| try: |
| bbox = draw.textbbox((0, 0), text, font=font) |
| tw = bbox[2] - bbox[0] |
| except AttributeError: |
| |
| tw, _ = draw.textsize(text, font=font) |
| x = (canvas_w - tw) // 2 |
| draw.text((x, y), text, fill=color, font=font) |
|
|
|
|
| def render_counting_stimulus( |
| count: int, |
| label: str = "โ", |
| canvas_w: int = _CANVAS_W, |
| canvas_h: int = _CANVAS_H, |
| ) -> np.ndarray: |
| """ |
| Return an RGB numpy array (canvas_h ร canvas_w ร 3) showing |
| *count* colourful filled circles with a title prompt above. |
| |
| Always returns a valid array โ never raises. |
| """ |
| if not _PIL: |
| |
| arr = np.full((canvas_h, canvas_w, 3), 200, dtype=np.uint8) |
| return arr |
|
|
| count = max(1, min(20, count)) |
|
|
| img = Image.new("RGB", (canvas_w, canvas_h), _BG) |
| draw = ImageDraw.Draw(img) |
|
|
| |
| draw.rounded_rectangle([4, 4, canvas_w-4, canvas_h-4], |
| radius=24, outline=(180,180,220), width=3) |
|
|
| |
| font_title = _get_font(26) |
| _draw_text_centered(draw, "Count the dots ๐", 14, font_title, |
| color=(50, 50, 160), canvas_w=canvas_w) |
|
|
| |
| area_top = 60 |
| area_bottom = canvas_h - 20 |
| area_left = 20 |
| area_right = canvas_w - 20 |
| area_w = area_right - area_left |
| area_h = area_bottom - area_top |
|
|
| cols = min(count, 5) |
| rows = math.ceil(count / cols) |
|
|
| |
| max_r_w = (area_w // cols) // 2 - 6 |
| max_r_h = (area_h // rows) // 2 - 6 |
| dot_r = max(10, min(36, max_r_w, max_r_h)) |
|
|
| cell_w = area_w // cols |
| cell_h = area_h // rows |
|
|
| for i in range(count): |
| col = i % cols |
| row = i // cols |
| cx = area_left + col * cell_w + cell_w // 2 |
| cy = area_top + row * cell_h + cell_h // 2 |
| color = _PALETTE[i % len(_PALETTE)] |
|
|
| |
| draw.ellipse( |
| [cx - dot_r + 3, cy - dot_r + 3, |
| cx + dot_r + 3, cy + dot_r + 3], |
| fill=(180, 180, 200)) |
| |
| draw.ellipse( |
| [cx - dot_r, cy - dot_r, |
| cx + dot_r, cy + dot_r], |
| fill=color) |
| |
| hr = max(3, dot_r // 4) |
| hx, hy = cx - dot_r // 3, cy - dot_r // 3 |
| draw.ellipse( |
| [hx - hr, hy - hr, hx + hr, hy + hr], |
| fill=(255, 255, 255, 160)) |
|
|
| |
| font_hint = _get_font(18) |
| hint = f"({count} dots)" |
| try: |
| bbox = draw.textbbox((0,0), hint, font=font_hint) |
| hw = bbox[2] - bbox[0] |
| except AttributeError: |
| hw, _ = draw.textsize(hint, font=font_hint) |
| draw.text((canvas_w - hw - 14, canvas_h - 26), hint, |
| fill=(180, 180, 200), font=font_hint) |
|
|
| return np.array(img) |
|
|